perl 2.0 patch 1: removed redundant debugging code in regexp.c
[p5sagit/p5-mst-13.2.git] / perl.man.1
CommitLineData
8d063cd8 1.rn '' }`
13281fa4 2''' $Header: perl.man.1,v 2.0.1.1 88/06/28 16:28:09 root Exp $
8d063cd8 3'''
4''' $Log: perl.man.1,v $
13281fa4 5''' Revision 2.0.1.1 88/06/28 16:28:09 root
6''' patch1: fixed some quotes
7''' patch1: clarified syntax of LIST
8'''
378cc40b 9''' Revision 2.0 88/06/05 00:09:23 root
10''' Baseline version 2.0.
8d063cd8 11'''
12'''
13.de Sh
14.br
15.ne 5
16.PP
17\fB\\$1\fR
18.PP
19..
20.de Sp
21.if t .sp .5v
22.if n .sp
23..
24.de Ip
25.br
26.ie \\n.$>=3 .ne \\$3
27.el .ne 3
28.IP "\\$1" \\$2
29..
30'''
31''' Set up \*(-- to give an unbreakable dash;
32''' string Tr holds user defined translation string.
33''' Bell System Logo is used as a dummy character.
34'''
378cc40b 35.tr \(*W-|\(bv\*(Tr
8d063cd8 36.ie n \{\
378cc40b 37.ds -- \(*W-
38.if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
39.if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
8d063cd8 40.ds L" ""
41.ds R" ""
42.ds L' '
43.ds R' '
44'br\}
45.el\{\
46.ds -- \(em\|
47.tr \*(Tr
48.ds L" ``
49.ds R" ''
50.ds L' `
51.ds R' '
52'br\}
53.TH PERL 1 LOCAL
54.SH NAME
55perl - Practical Extraction and Report Language
56.SH SYNOPSIS
57.B perl [options] filename args
58.SH DESCRIPTION
59.I Perl
60is a interpreted language optimized for scanning arbitrary text files,
61extracting information from those text files, and printing reports based
62on that information.
63It's also a good language for many system management tasks.
64The language is intended to be practical (easy to use, efficient, complete)
65rather than beautiful (tiny, elegant, minimal).
66It combines (in the author's opinion, anyway) some of the best features of C,
67\fIsed\fR, \fIawk\fR, and \fIsh\fR,
68so people familiar with those languages should have little difficulty with it.
69(Language historians will also note some vestiges of \fIcsh\fR, Pascal, and
70even BASIC-PLUS.)
71Expression syntax corresponds quite closely to C expression syntax.
72If you have a problem that would ordinarily use \fIsed\fR
73or \fIawk\fR or \fIsh\fR, but it
74exceeds their capabilities or must run a little faster,
75and you don't want to write the silly thing in C, then
76.I perl
77may be for you.
78There are also translators to turn your sed and awk scripts into perl scripts.
79OK, enough hype.
80.PP
81Upon startup,
82.I perl
83looks for your script in one of the following places:
84.Ip 1. 4 2
85Specified line by line via
86.B \-e
87switches on the command line.
88.Ip 2. 4 2
89Contained in the file specified by the first filename on the command line.
90(Note that systems supporting the #! notation invoke interpreters this way.)
91.Ip 3. 4 2
378cc40b 92Passed in implicity via standard input.
93This only works if there are no filename arguments\*(--to pass
94arguments to a stdin script you must explicitly specify a - for the script name.
8d063cd8 95.PP
96After locating your script,
97.I perl
98compiles it to an internal form.
99If the script is syntactically correct, it is executed.
100.Sh "Options"
83b4785a 101Note: on first reading this section may not make much sense to you. It's here
8d063cd8 102at the front for easy reference.
103.PP
104A single-character option may be combined with the following option, if any.
105This is particularly useful when invoking a script using the #! construct which
106only allows one argument. Example:
107.nf
108
109.ne 2
378cc40b 110 #!/usr/bin/perl -spi.bak # same as -s -p -i.bak
8d063cd8 111 .\|.\|.
112
113.fi
114Options include:
115.TP 5
378cc40b 116.B \-a
117turns on autosplit mode when used with a \-n or \-p.
118An implicit split command to the @F array
119is done as the first thing inside the implicit while loop produced by
120the \-n or \-p.
121.nf
122
123 perl -ane 'print pop(@F),"\en";'
124
125is equivalent to
126
127 while (<>) {
128 @F = split(' ');
129 print pop(@F),"\en";
130 }
131
132.fi
133.TP 5
8d063cd8 134.B \-D<number>
135sets debugging flags.
136To watch how it executes your script, use
137.B \-D14.
138(This only works if debugging is compiled into your
139.IR perl .)
140.TP 5
141.B \-e commandline
142may be used to enter one line of script.
143Multiple
144.B \-e
145commands may be given to build up a multi-line script.
146If
147.B \-e
148is given,
149.I perl
150will not look for a script filename in the argument list.
151.TP 5
152.B \-i<extension>
153specifies that files processed by the <> construct are to be edited
154in-place.
155It does this by renaming the input file, opening the output file by the
156same name, and selecting that output file as the default for print statements.
157The extension, if supplied, is added to the name of the
158old file to make a backup copy.
159If no extension is supplied, no backup is made.
378cc40b 160Saying \*(L"perl -p -i.bak -e "s/foo/bar/;" .\|.\|. \*(R" is the same as using
8d063cd8 161the script:
162.nf
163
164.ne 2
378cc40b 165 #!/usr/bin/perl -pi.bak
8d063cd8 166 s/foo/bar/;
167
168which is equivalent to
169
170.ne 14
378cc40b 171 #!/usr/bin/perl
8d063cd8 172 while (<>) {
173 if ($ARGV ne $oldargv) {
174 rename($ARGV,$ARGV . '.bak');
175 open(ARGVOUT,">$ARGV");
176 select(ARGVOUT);
177 $oldargv = $ARGV;
178 }
179 s/foo/bar/;
180 }
181 continue {
182 print; # this prints to original filename
183 }
184 select(stdout);
185
186.fi
187except that the \-i form doesn't need to compare $ARGV to $oldargv to know when
188the filename has changed.
189It does, however, use ARGVOUT for the selected filehandle.
190Note that stdout is restored as the default output filehandle after the loop.
378cc40b 191.Sp
192You can use eof to locate the end of each input file, in case you want
193to append to each file, or reset line numbering (see example under eof).
8d063cd8 194.TP 5
195.B \-I<directory>
196may be used in conjunction with
197.B \-P
198to tell the C preprocessor where to look for include files.
199By default /usr/include and /usr/lib/perl are searched.
200.TP 5
201.B \-n
202causes
203.I perl
204to assume the following loop around your script, which makes it iterate
205over filename arguments somewhat like \*(L"sed -n\*(R" or \fIawk\fR:
206.nf
207
208.ne 3
209 while (<>) {
378cc40b 210 .\|.\|. # your script goes here
8d063cd8 211 }
212
213.fi
214Note that the lines are not printed by default.
215See
216.B \-p
217to have lines printed.
378cc40b 218Here is an efficient way to delete all files older than a week:
219.nf
220
221 find . -mtime +7 -print | perl -ne 'chop;unlink;'
222
223.fi
224This is faster than using the -exec switch find because you don't have to
225start a process on every filename found.
8d063cd8 226.TP 5
227.B \-p
228causes
229.I perl
230to assume the following loop around your script, which makes it iterate
231over filename arguments somewhat like \fIsed\fR:
232.nf
233
234.ne 5
235 while (<>) {
378cc40b 236 .\|.\|. # your script goes here
8d063cd8 237 } continue {
238 print;
239 }
240
241.fi
242Note that the lines are printed automatically.
243To suppress printing use the
244.B \-n
245switch.
83b4785a 246A
247.B \-p
248overrides a
249.B \-n
250switch.
8d063cd8 251.TP 5
252.B \-P
253causes your script to be run through the C preprocessor before
254compilation by
255.I perl.
256(Since both comments and cpp directives begin with the # character,
257you should avoid starting comments with any words recognized
258by the C preprocessor such as \*(L"if\*(R", \*(L"else\*(R" or \*(L"define\*(R".)
259.TP 5
260.B \-s
261enables some rudimentary switch parsing for switches on the command line
83b4785a 262after the script name but before any filename arguments (or before a --).
263Any switch found there is removed from @ARGV and sets the corresponding variable in the
8d063cd8 264.I perl
265script.
266The following script prints \*(L"true\*(R" if and only if the script is
83b4785a 267invoked with a -xyz switch.
8d063cd8 268.nf
269
270.ne 2
378cc40b 271 #!/usr/bin/perl -s
83b4785a 272 if ($xyz) { print "true\en"; }
8d063cd8 273
274.fi
378cc40b 275.TP 5
276.B \-S
277makes perl use the PATH environment variable to search for the script
278(unless the name of the script starts with a slash).
279Typically this is used to emulate #! startup on machines that don't
280support #!, in the following manner:
281.nf
282
283 #!/usr/bin/perl
284 eval "exec /usr/bin/perl -S $0 $*"
285 if $running_under_some_shell;
286
287.fi
288The system ignores the first line and feeds the script to /bin/sh,
289which proceeds to try to execute the perl script as a shell script.
290The shell executes the second line as a normal shell command, and thus
291starts up the perl interpreter.
292On some systems $0 doesn't always contain the full pathname,
293so the -S tells perl to search for the script if necessary.
294After perl locates the script, it parses the lines and ignores them because
295the variable $running_under_some_shell is never true.
296.TP 5
297.B \-U
298allows perl to do unsafe operations.
13281fa4 299Currently the only \*(L"unsafe\*(R" operation is the unlinking of directories while
378cc40b 300running as superuser.
301.TP 5
302.B \-v
303prints the version and patchlevel of your perl executable.
304.TP 5
305.B \-w
306prints warnings about identifiers that are mentioned only once, and scalar
307variables that are used before being set.
308Also warns about redefined subroutines, and references to undefined
309subroutines and filehandles.
8d063cd8 310.Sh "Data Types and Objects"
311.PP
378cc40b 312Perl has about two and a half data types: scalars, arrays of scalars, and
8d063cd8 313associative arrays.
378cc40b 314Scalars and arrays of scalars are first class objects, for the most part,
8d063cd8 315in the sense that they can be used as a whole as values in an expression.
316Associative arrays can only be accessed on an association by association basis;
317they don't have a value as a whole (at least not yet).
318.PP
378cc40b 319Scalars are interpreted as strings or numbers as appropriate.
320A scalar is interpreted as TRUE in the boolean sense if it is not the null
8d063cd8 321string or 0.
322Booleans returned by operators are 1 for true and '0' or '' (the null
323string) for false.
324.PP
378cc40b 325References to scalar variables always begin with \*(L'$\*(R', even when referring
326to a scalar that is part of an array.
8d063cd8 327Thus:
328.nf
329
330.ne 3
378cc40b 331 $days \h'|2i'# a simple scalar variable
8d063cd8 332 $days[28] \h'|2i'# 29th element of array @days
333 $days{'Feb'}\h'|2i'# one value from an associative array
378cc40b 334 $#days \h'|2i'# last index of array @days
8d063cd8 335
336but entire arrays are denoted by \*(L'@\*(R':
337
338 @days \h'|2i'# ($days[0], $days[1],\|.\|.\|. $days[n])
339
340.fi
341.PP
378cc40b 342Any of these five constructs may server as an lvalue,
343that is, may be assigned to.
344(You may also use an assignment to one of these lvalues as an lvalue in
345certain contexts\*(--see s, tr and chop.)
346You may find the length of array @days by evaluating
8d063cd8 347\*(L"$#days\*(R", as in
348.IR csh .
378cc40b 349(Actually, it's not the length of the array, it's the subscript of the last element, since there is (ordinarily) a 0th element.)
350Assigning to $#days changes the length of the array.
351Shortening an array by this method does not actually destroy any values.
352Lengthening an array that was previously shortened recovers the values that
353were in those elements.
354You can also gain some measure of efficiency by preextending an array that
355is going to get big.
356(You can also extend an array by assigning to an element that is off the
357end of the array.
358This differs from assigning to $#whatever in that intervening values
359are set to null rather than recovered.)
360You can truncate an array down to nothing by assigning the null list () to
361it.
362The following are exactly equivalent
363.nf
364
365 @whatever = ();
366 $#whatever = $[ \- 1;
367
368.fi
8d063cd8 369.PP
370Every data type has its own namespace.
378cc40b 371You can, without fear of conflict, use the same name for a scalar variable,
8d063cd8 372an array, an associative array, a filehandle, a subroutine name, and/or
373a label.
374Since variable and array references always start with \*(L'$\*(R'
375or \*(L'@\*(R', the \*(L"reserved\*(R" words aren't in fact reserved
376with respect to variable names.
377(They ARE reserved with respect to labels and filehandles, however, which
378cc40b 378don't have an initial special character.
379Hint: you could say open(LOG,'logfile') rather than open(log,'logfile').)
8d063cd8 380Case IS significant\*(--\*(L"FOO\*(R", \*(L"Foo\*(R" and \*(L"foo\*(R" are all
381different names.
382Names which start with a letter may also contain digits and underscores.
383Names which do not start with a letter are limited to one character,
384e.g. \*(L"$%\*(R" or \*(L"$$\*(R".
385(Many one character names have a predefined significance to
386.I perl.
387More later.)
388.PP
389String literals are delimited by either single or double quotes.
390They work much like shell quotes:
391double-quoted string literals are subject to backslash and variable
392substitution; single-quoted strings are not.
393The usual backslash rules apply for making characters such as newline, tab, etc.
394You can also embed newlines directly in your strings, i.e. they can end on
395a different line than they begin.
396This is nice, but if you forget your trailing quote, the error will not be
397reported until perl finds another line containing the quote character, which
398may be much further on in the script.
378cc40b 399Variable substitution inside strings is limited (currently) to simple scalar variables.
8d063cd8 400The following code segment prints out \*(L"The price is $100.\*(R"
401.nf
402
403.ne 2
404 $Price = '$100';\h'|3.5i'# not interpreted
405 print "The price is $Price.\e\|n";\h'|3.5i'# interpreted
406
407.fi
83b4785a 408Note that you can put curly brackets around the identifier to delimit it
409from following alphanumerics.
8d063cd8 410.PP
411Array literals are denoted by separating individual values by commas, and
412enclosing the list in parentheses.
413In a context not requiring an array value, the value of the array literal
414is the value of the final element, as in the C comma operator.
415For example,
416.nf
417
83b4785a 418.ne 4
8d063cd8 419 @foo = ('cc', '\-E', $bar);
420
421assigns the entire array value to array foo, but
422
423 $foo = ('cc', '\-E', $bar);
424
425.fi
426assigns the value of variable bar to variable foo.
427Array lists may be assigned to if and only if each element of the list
428is an lvalue:
429.nf
430
431 ($a, $b, $c) = (1, 2, 3);
432
433 ($map{'red'}, $map{'blue'}, $map{'green'}) = (0x00f, 0x0f0, 0xf00);
434
435.fi
378cc40b 436Array assignment returns the number of elements assigned.
8d063cd8 437.PP
438Numeric literals are specified in any of the usual floating point or
439integer formats.
440.PP
441There are several other pseudo-literals that you should know about.
378cc40b 442If a string is enclosed by backticks (grave accents), it first undergoes
443variable substitution just like a double quoted string.
444It is then interpreted as a command, and the output of that command
445is the value of the pseudo-literal, like in a shell.
8d063cd8 446The command is executed each time the pseudo-literal is evaluated.
378cc40b 447The status value of the command is returned in $? (see Predefined Names
448for the interpretation of $?).
449Unlike in \f2csh\f1, no translation is done on the return
8d063cd8 450data\*(--newlines remain newlines.
378cc40b 451Unlike in any of the shells, single quotes do not hide variable names
452in the command from interpretation.
453To pass a $ through to the shell you need to hide it with a backslash.
8d063cd8 454.PP
455Evaluating a filehandle in angle brackets yields the next line
456from that file (newline included, so it's never false until EOF).
457Ordinarily you must assign that value to a variable,
458but there is one situation where in which an automatic assignment happens.
459If (and only if) the input symbol is the only thing inside the conditional of a
460.I while
461loop, the value is
462automatically assigned to the variable \*(L"$_\*(R".
463(This may seem like an odd thing to you, but you'll use the construct
464in almost every
465.I perl
466script you write.)
467Anyway, the following lines are equivalent to each other:
468.nf
469
470.ne 3
471 while ($_ = <stdin>) {
472 while (<stdin>) {
473 for (\|;\|<stdin>;\|) {
474
475.fi
476The filehandles
477.IR stdin ,
478.I stdout
479and
480.I stderr
481are predefined.
482Additional filehandles may be created with the
483.I open
484function.
485.PP
378cc40b 486If a <FILEHANDLE> is used in a context that is looking for an array, an array
487consisting of all the input lines is returned, one line per array element.
488It's easy to make a LARGE data space this way, so use with care.
489.PP
8d063cd8 490The null filehandle <> is special and can be used to emulate the behavior of
491\fIsed\fR and \fIawk\fR.
492Input from <> comes either from standard input, or from each file listed on
493the command line.
494Here's how it works: the first time <> is evaluated, the ARGV array is checked,
495and if it is null, $ARGV[0] is set to '-', which when opened gives you standard
496input.
497The ARGV array is then processed as a list of filenames.
498The loop
499.nf
500
501.ne 3
502 while (<>) {
503 .\|.\|. # code for each line
504 }
505
506.ne 10
507is equivalent to
508
509 unshift(@ARGV, '\-') \|if \|$#ARGV < $[;
510 while ($ARGV = shift) {
511 open(ARGV, $ARGV);
512 while (<ARGV>) {
513 .\|.\|. # code for each line
514 }
515 }
516
517.fi
518except that it isn't as cumbersome to say.
519It really does shift array ARGV and put the current filename into
520variable ARGV.
521It also uses filehandle ARGV internally.
522You can modify @ARGV before the first <> as long as you leave the first
523filename at the beginning of the array.
83b4785a 524Line numbers ($.) continue as if the input was one big happy file.
378cc40b 525(But see example under eof for how to reset line numbers on each file.)
8d063cd8 526.PP
83b4785a 527.ne 5
378cc40b 528If you want to set @ARGV to your own list of files, go right ahead.
8d063cd8 529If you want to pass switches into your script, you can
530put a loop on the front like this:
531.nf
532
533.ne 10
534 while ($_ = $ARGV[0], /\|^\-/\|) {
535 shift;
536 last if /\|^\-\|\-$\|/\|;
537 /\|^\-D\|(.*\|)/ \|&& \|($debug = $1);
538 /\|^\-v\|/ \|&& \|$verbose++;
539 .\|.\|. # other switches
540 }
541 while (<>) {
542 .\|.\|. # code for each line
543 }
544
545.fi
546The <> symbol will return FALSE only once.
547If you call it again after this it will assume you are processing another
548@ARGV list, and if you haven't set @ARGV, will input from stdin.
378cc40b 549.PP
550If the string inside the angle brackets is a reference to a scalar variable
551(e.g. <$foo>),
552then that variable contains the name of the filehandle to input from.
553.PP
554If the string inside angle brackets is not a filehandle, it is interpreted
555as a filename pattern to be globbed, and either an array of filenames or the
556next filename in the list is returned, depending on context.
557One level of $ interpretation is done first, but you can't say <$foo>
558because that's an indirect filehandle as explained in the previous
559paragraph.
560You could insert curly brackets to force interpretation as a
561filename glob: <${foo}>.
562Example:
563.nf
564
565.ne 3
566 while (<*.c>) {
567 chmod 0644,$_;
568 }
569
570is equivalent to
571
572.ne 5
573 open(foo,"echo *.c | tr -s ' \et\er\ef' '\e\e012\e\e012\e\e012\e\e012'|");
574 while (<foo>) {
575 chop;
576 chmod 0644,$_;
577 }
578
579.fi
580In fact, it's currently implemented that way.
581(Which means it will not work on filenames with spaces in them.)
582Of course, the shortest way to do the above is:
583.nf
584
585 chmod 0644,<*.c>;
586
587.fi
8d063cd8 588.Sh "Syntax"
589.PP
590A
591.I perl
592script consists of a sequence of declarations and commands.
593The only things that need to be declared in
594.I perl
595are report formats and subroutines.
596See the sections below for more information on those declarations.
597All objects are assumed to start with a null or 0 value.
598The sequence of commands is executed just once, unlike in
599.I sed
600and
601.I awk
602scripts, where the sequence of commands is executed for each input line.
603While this means that you must explicitly loop over the lines of your input file
604(or files), it also means you have much more control over which files and which
605lines you look at.
606(Actually, I'm lying\*(--it is possible to do an implicit loop with either the
607.B \-n
608or
609.B \-p
610switch.)
611.PP
612A declaration can be put anywhere a command can, but has no effect on the
613execution of the primary sequence of commands.
614Typically all the declarations are put at the beginning or the end of the script.
615.PP
616.I Perl
617is, for the most part, a free-form language.
618(The only exception to this is format declarations, for fairly obvious reasons.)
619Comments are indicated by the # character, and extend to the end of the line.
620If you attempt to use /* */ C comments, it will be interpreted either as
621division or pattern matching, depending on the context.
622So don't do that.
623.Sh "Compound statements"
624In
625.IR perl ,
626a sequence of commands may be treated as one command by enclosing it
627in curly brackets.
628We will call this a BLOCK.
629.PP
630The following compound commands may be used to control flow:
631.nf
632
633.ne 4
634 if (EXPR) BLOCK
635 if (EXPR) BLOCK else BLOCK
378cc40b 636 if (EXPR) BLOCK elsif (EXPR) BLOCK .\|.\|. else BLOCK
8d063cd8 637 LABEL while (EXPR) BLOCK
638 LABEL while (EXPR) BLOCK continue BLOCK
639 LABEL for (EXPR; EXPR; EXPR) BLOCK
378cc40b 640 LABEL foreach VAR (ARRAY) BLOCK
8d063cd8 641 LABEL BLOCK continue BLOCK
642
643.fi
83b4785a 644Note that, unlike C and Pascal, these are defined in terms of BLOCKs, not
8d063cd8 645statements.
646This means that the curly brackets are \fIrequired\fR\*(--no dangling statements allowed.
647If you want to write conditionals without curly brackets there are several
648other ways to do it.
649The following all do the same thing:
650.nf
651
652.ne 5
653 if (!open(foo)) { die "Can't open $foo"; }
654 die "Can't open $foo" unless open(foo);
655 open(foo) || die "Can't open $foo"; # foo or bust!
656 open(foo) ? die "Can't open $foo" : 'hi mom';
83b4785a 657 # a bit exotic, that last one
8d063cd8 658
659.fi
8d063cd8 660.PP
661The
662.I if
663statement is straightforward.
664Since BLOCKs are always bounded by curly brackets, there is never any
665ambiguity about which
666.I if
667an
668.I else
669goes with.
670If you use
671.I unless
672in place of
673.IR if ,
674the sense of the test is reversed.
675.PP
676The
677.I while
678statement executes the block as long as the expression is true
679(does not evaluate to the null string or 0).
680The LABEL is optional, and if present, consists of an identifier followed by
681a colon.
682The LABEL identifies the loop for the loop control statements
683.IR next ,
684.I last
685and
686.I redo
687(see below).
688If there is a
689.I continue
690BLOCK, it is always executed just before
691the conditional is about to be evaluated again, similarly to the third part
692of a
693.I for
694loop in C.
695Thus it can be used to increment a loop variable, even when the loop has
696been continued via the
697.I next
698statement (similar to the C \*(L"continue\*(R" statement).
699.PP
700If the word
701.I while
702is replaced by the word
703.IR until ,
704the sense of the test is reversed, but the conditional is still tested before
705the first iteration.
706.PP
707In either the
708.I if
709or the
710.I while
711statement, you may replace \*(L"(EXPR)\*(R" with a BLOCK, and the conditional
712is true if the value of the last command in that block is true.
713.PP
714The
715.I for
716loop works exactly like the corresponding
717.I while
718loop:
719.nf
720
721.ne 12
722 for ($i = 1; $i < 10; $i++) {
723 .\|.\|.
724 }
725
726is the same as
727
728 $i = 1;
729 while ($i < 10) {
730 .\|.\|.
731 } continue {
732 $i++;
733 }
734.fi
735.PP
378cc40b 736The foreach loop iterates over a normal array value and sets the variable
737VAR to be each element of the array in turn.
13281fa4 738The \*(L"foreach\*(R" keyword is actually identical to the \*(L"for\*(R" keyword,
739so you can use \*(L"foreach\*(R" for readability or \*(L"for\*(R" for brevity.
378cc40b 740If VAR is omitted, $_ is set to each value.
741If ARRAY is an actual array (as opposed to an expression returning an array
742value), you can modify each element of the array
743by modifying VAR inside the loop.
744Examples:
745.nf
746
747.ne 5
748 for (@ary) { s/foo/bar/; }
749
750 foreach $elem (@elements) {
751 $elem *= 2;
752 }
753
754 for ((10,9,8,7,6,5,4,3,2,1,'BOOM')) {
755 print $_,"\en"; sleep(1);
756 }
757
758.ne 3
759 foreach $item (split(/:[\e\e\en:]*/,$ENV{'TERMCAP'}) {
760 print "Item: $item\en";
761 }
762.fi
763.PP
8d063cd8 764The BLOCK by itself (labeled or not) is equivalent to a loop that executes
765once.
766Thus you can use any of the loop control statements in it to leave or
767restart the block.
768The
769.I continue
770block is optional.
771This construct is particularly nice for doing case structures.
772.nf
773
774.ne 6
775 foo: {
776 if (/abc/) { $abc = 1; last foo; }
777 if (/def/) { $def = 1; last foo; }
778 if (/xyz/) { $xyz = 1; last foo; }
779 $nothing = 1;
780 }
781
782.fi
378cc40b 783It's also nice for exiting subroutines early.
784Note the double curly brackets:
785.nf
786
787.ne 8
788 sub tokenize {{
789 .\|.\|.
790 if (/foo/) {
791 23; # return value
792 last;
793 }
794 .\|.\|.
795 }}
796
797.fi
8d063cd8 798.Sh "Simple statements"
799The only kind of simple statement is an expression evaluated for its side
800effects.
801Every expression (simple statement) must be terminated with a semicolon.
802Note that this is like C, but unlike Pascal (and
803.IR awk ).
804.PP
805Any simple statement may optionally be followed by a
806single modifier, just before the terminating semicolon.
807The possible modifiers are:
808.nf
809
810.ne 4
811 if EXPR
812 unless EXPR
813 while EXPR
814 until EXPR
815
816.fi
817The
818.I if
819and
820.I unless
821modifiers have the expected semantics.
822The
823.I while
824and
378cc40b 825.I until
8d063cd8 826modifiers also have the expected semantics (conditional evaluated first),
827except when applied to a do-BLOCK command,
828in which case the block executes once before the conditional is evaluated.
829This is so that you can write loops like:
830.nf
831
832.ne 4
833 do {
834 $_ = <stdin>;
835 .\|.\|.
836 } until $_ \|eq \|".\|\e\|n";
837
838.fi
839(See the
840.I do
841operator below. Note also that the loop control commands described later will
83b4785a 842NOT work in this construct, since modifiers don't take loop labels.
8d063cd8 843Sorry.)
844.Sh "Expressions"
845Since
846.I perl
847expressions work almost exactly like C expressions, only the differences
848will be mentioned here.
849.PP
850Here's what
851.I perl
852has that C doesn't:
853.Ip (\|) 8 3
854The null list, used to initialize an array to null.
855.Ip . 8
856Concatenation of two strings.
857.Ip .= 8
858The corresponding assignment operator.
859.Ip eq 8
860String equality (== is numeric equality).
861For a mnemonic just think of \*(L"eq\*(R" as a string.
862(If you are used to the
863.I awk
864behavior of using == for either string or numeric equality
865based on the current form of the comparands, beware!
866You must be explicit here.)
867.Ip ne 8
868String inequality (!= is numeric inequality).
869.Ip lt 8
870String less than.
871.Ip gt 8
872String greater than.
873.Ip le 8
874String less than or equal.
875.Ip ge 8
876String greater than or equal.
877.Ip =~ 8 2
878Certain operations search or modify the string \*(L"$_\*(R" by default.
879This operator makes that kind of operation work on some other string.
880The right argument is a search pattern, substitution, or translation.
881The left argument is what is supposed to be searched, substituted, or
882translated instead of the default \*(L"$_\*(R".
883The return value indicates the success of the operation.
884(If the right argument is an expression other than a search pattern,
885substitution, or translation, it is interpreted as a search pattern
886at run time.
887This is less efficient than an explicit search, since the pattern must
888be compiled every time the expression is evaluated.)
889The precedence of this operator is lower than unary minus and autoincrement/decrement, but higher than everything else.
890.Ip !~ 8
891Just like =~ except the return value is negated.
892.Ip x 8
893The repetition operator.
894Returns a string consisting of the left operand repeated the
895number of times specified by the right operand.
896.nf
897
898 print '-' x 80; # print row of dashes
899 print '-' x80; # illegal, x80 is identifier
900
901 print "\et" x ($tab/8), ' ' x ($tab%8); # tab over
902
903.fi
904.Ip x= 8
905The corresponding assignment operator.
906.Ip .. 8
907The range operator, which is bistable.
378cc40b 908Each .. operator maintains its own boolean state.
909It is false as long as its left operand is false.
910Once the left operand is true, the range operator stays true
911until the right operand is true,
912AFTER which the range operator becomes false again.
913(It doesn't become false till the next time the range operator evaluated.
8d063cd8 914It can become false on the same evaluation it became true, but it still returns
915true once.)
13281fa4 916The right operand is not evaluated while the operator is in the \*(L"false\*(R" state,
917and the left operand is not evaluated while the operator is in the \*(L"true\*(R" state.
8d063cd8 918The .. operator is primarily intended for doing line number ranges after
919the fashion of \fIsed\fR or \fIawk\fR.
920The precedence is a little lower than || and &&.
921The value returned is either the null string for false, or a sequence number
922(beginning with 1) for true.
923The sequence number is reset for each range encountered.
924The final sequence number in a range has the string 'E0' appended to it, which
925doesn't affect its numeric value, but gives you something to search for if you
926want to exclude the endpoint.
927You can exclude the beginning point by waiting for the sequence number to be
928greater than 1.
378cc40b 929If either operand of .. is static, that operand is implicitly compared to
8d063cd8 930the $. variable, the current line number.
931Examples:
932.nf
933
934.ne 5
935 if (101 .. 200) { print; } # print 2nd hundred lines
936
937 next line if (1 .. /^$/); # skip header lines
938
939 s/^/> / if (/^$/ .. eof()); # quote body
940
941.fi
378cc40b 942.Ip \-x 8
943A file test.
944This unary operator takes one argument, either a filename or a filehandle,
945and tests the associated file to see if something is true about it.
946If the argument is omitted, tests $_, except for \-t, which tests stdin.
947It returns 1 for true and '' for false.
948Precedence is higher than logical and relational operators, but lower than
949arithmetic operators.
950The operator may be any of:
951.nf
952 \-r File is readable by effective uid.
953 \-w File is writeable by effective uid.
954 \-x File is executable by effective uid.
955 \-o File is owned by effective uid.
956 \-R File is readable by real uid.
957 \-W File is writeable by real uid.
958 \-X File is executable by real uid.
959 \-O File is owned by real uid.
960 \-e File exists.
961 \-z File has zero size.
962 \-s File has non-zero size.
963 \-f File is a plain file.
964 \-d File is a directory.
965 \-l File is a symbolic link.
966 \-p File is a named pipe (FIFO).
967 \-S File is a socket.
968 \-b File is a block special file.
969 \-c File is a character special file.
970 \-u File has setuid bit set.
971 \-g File has setgid bit set.
972 \-k File has sticky bit set.
973 \-t Filehandle is opened to a tty.
974 \-T File is a text file.
975 \-B File is a binary file (opposite of \-T).
976
977.fi
978The interpretation of the file permission operators \-r, \-R, \-w, \-W, \-x and \-X
979is based solely on the mode of the file and the uids and gids of the user.
980There may be other reasons you can't actually read, write or execute the file.
981Also note that, for the superuser, \-r, \-R, \-w and \-W always return 1, and
982\-x and \-X return 1 if any execute bit is set in the mode.
983Scripts run by the superuser may thus need to do a stat() in order to determine
984the actual mode of the file, or temporarily set the uid to something else.
985.Sp
986Example:
987.nf
988.ne 7
989
990 while (<>) {
991 chop;
992 next unless \-f $_; # ignore specials
993 .\|.\|.
994 }
995
996.fi
997Note that -s/a/b/ does not do a negated substitution.
998Saying -exp($foo) still works as expected, however\*(--only single letters
999following a minus are interpreted as file tests.
1000.Sp
1001The \-T and \-B switches work as follows.
1002The first block or so of the file is examined for odd characters such as
1003strange control codes or metacharacters.
1004If too many odd characters (>10%) are found, it's a \-B file, otherwise it's a \-T file.
1005Also, any file containing null in the first block is considered a binary file.
1006If \-T or \-B is used on a filehandle, the current stdio buffer is examined
1007rather than the first block.
1008Since input doesn't work well on binary files you should probably test a
1009filehandle before doing any input if you're unsure of the nature of the
1010filehandle you've been handed (usually via stdin).
1011Both \-T and \-B return TRUE on a null file, or a file at EOF when testing
1012a filehandle.
8d063cd8 1013.PP
1014Here is what C has that
1015.I perl
1016doesn't:
1017.Ip "unary &" 12
1018Address-of operator.
1019.Ip "unary *" 12
1020Dereference-address operator.
378cc40b 1021.Ip "(TYPE)" 12
1022Type casting operator.
8d063cd8 1023.PP
1024Like C,
1025.I perl
1026does a certain amount of expression evaluation at compile time, whenever
1027it determines that all of the arguments to an operator are static and have
1028no side effects.
1029In particular, string concatenation happens at compile time between literals that don't do variable substitution.
1030Backslash interpretation also happens at compile time.
1031You can say
1032.nf
1033
1034.ne 2
1035 'Now is the time for all' . "\|\e\|n" .
1036 'good men to come to.'
1037
1038.fi
1039and this all reduces to one string internally.
1040.PP
378cc40b 1041The autoincrement operator has a little extra built-in magic to it.
1042If you increment a variable that is numeric, or that has ever been used in
1043a numeric context, you get a normal increment.
1044If, however, the variable has only been used in string contexts since it
1045was set, and has a value that is not null and matches the
1046pattern /^[a-zA-Z]*[0-9]*$/, the increment is done
1047as a string, preserving each character within its range, with carry:
1048.nf
1049
1050 print ++($foo = '99'); # prints '100'
1051 print ++($foo = 'a0'); # prints 'a1'
1052 print ++($foo = 'Az'); # prints 'Ba'
1053 print ++($foo = 'zz'); # prints 'aaa'
1054
1055.fi
1056The autodecrement is not magical.
1057.PP
8d063cd8 1058Along with the literals and variables mentioned earlier,
378cc40b 1059the following operations can serve as terms in an expression.
1060Some of these operations take a LIST as an argument.
1061Such a list can consist of any combination of scalar arguments or arrays;
1062the arrays will be included in the list as if each individual element were
1063interpolated at that point in the list.
13281fa4 1064Elements of the LIST should be separated by commas.
378cc40b 1065.Ip "/PATTERN/i" 8 4
8d063cd8 1066Searches a string for a pattern, and returns true (1) or false ('').
1067If no string is specified via the =~ or !~ operator,
1068the $_ string is searched.
1069(The string specified with =~ need not be an lvalue\*(--it may be the result of an expression evaluation, but remember the =~ binds rather tightly.)
1070See also the section on regular expressions.
1071.Sp
1072If you prepend an `m' you can use any pair of characters as delimiters.
1073This is particularly useful for matching Unix path names that contain `/'.
378cc40b 1074If the final delimiter is followed by the optional letter `i', the matching is
1075done in a case-insensitive manner.
1076.Sp
1077If used in a context that requires an array value, a pattern match returns an
1078array consisting of the subexpressions matched by the parens in pattern,
1079i.e. ($1, $2, $3.\|.\|.).
8d063cd8 1080.Sp
1081Examples:
1082.nf
1083
1084.ne 4
1085 open(tty, '/dev/tty');
378cc40b 1086 <tty> \|=~ \|/\|^y\|/i \|&& \|do foo(\|); # do foo if desired
8d063cd8 1087
1088 if (/Version: \|*\|([0-9.]*\|)\|/\|) { $version = $1; }
1089
1090 next if m#^/usr/spool/uucp#;
1091
378cc40b 1092 if (($F1,$F2,$Etc) = ($foo =~ /^(\eS+)\es+(\eS+)\es*(.*)/))
1093
8d063cd8 1094.fi
378cc40b 1095This last example splits $foo into the first two words and the remainder
1096of the line, and assigns those three fields to $F1, $F2 and $Etc.
1097The conditional is true if any variables were assigned, i.e. if the pattern
1098matched.
8d063cd8 1099.Ip "?PATTERN?" 8 4
1100This is just like the /pattern/ search, except that it matches only once between
1101calls to the
1102.I reset
1103operator.
1104This is a useful optimization when you only want to see the first occurence of
378cc40b 1105something in each file of a set of files, for instance.
8d063cd8 1106.Ip "chdir EXPR" 8 2
378cc40b 1107Changes the working directory to EXPR, if possible.
8d063cd8 1108Returns 1 upon success, 0 otherwise.
1109See example under die().
1110.Ip "chmod LIST" 8 2
1111Changes the permissions of a list of files.
1112The first element of the list must be the numerical mode.
8d063cd8 1113Returns the number of files successfully changed.
8d063cd8 1114.nf
1115
378cc40b 1116.ne 2
1117 $cnt = chmod 0755,'foo','bar';
1118 chmod 0755,@executables;
8d063cd8 1119
1120.fi
1121.Ip "chop(VARIABLE)" 8 5
1122.Ip "chop" 8
1123Chops off the last character of a string and returns it.
1124It's used primarily to remove the newline from the end of an input record,
1125but is much more efficient than s/\en// because it neither scans nor copies
1126the string.
1127If VARIABLE is omitted, chops $_.
1128Example:
1129.nf
1130
1131.ne 5
1132 while (<>) {
1133 chop; # avoid \en on last field
1134 @array = split(/:/);
1135 .\|.\|.
1136 }
1137
1138.fi
378cc40b 1139You can actually chop anything that's an lvalue, including an assignment:
1140.nf
1141
1142 chop($cwd = `pwd`);
1143
1144.fi
8d063cd8 1145.Ip "chown LIST" 8 2
1146Changes the owner (and group) of a list of files.
378cc40b 1147The first two elements of the list must be the NUMERICAL uid and gid,
1148in that order.
8d063cd8 1149Returns the number of files successfully changed.
8d063cd8 1150.nf
1151
378cc40b 1152.ne 2
1153 $cnt = chown $uid,$gid,'foo','bar';
1154 chown $uid,$gid,@filenames;
8d063cd8 1155
1156.fi
378cc40b 1157.ne 23
8d063cd8 1158Here's an example of looking up non-numeric uids:
1159.nf
1160
8d063cd8 1161 print "User: ";
1162 $user = <stdin>;
378cc40b 1163 chop($user);
1164 print "Files: "
1165 $pattern = <stdin>;
1166 chop($pattern);
8d063cd8 1167 open(pass,'/etc/passwd') || die "Can't open passwd";
1168 while (<pass>) {
1169 ($login,$pass,$uid,$gid) = split(/:/);
1170 $uid{$login} = $uid;
1171 $gid{$login} = $gid;
1172 }
378cc40b 1173 @ary = <$pattern>; # get filenames
8d063cd8 1174 if ($uid{$user} eq '') {
1175 die "$user not in passwd file";
1176 }
1177 else {
1178 unshift(@ary,$uid{$user},$gid{$user});
1179 chown @ary;
1180 }
1181
1182.fi
1183.Ip "close(FILEHANDLE)" 8 5
1184.Ip "close FILEHANDLE" 8
1185Closes the file or pipe associated with the file handle.
1186You don't have to close FILEHANDLE if you are immediately going to
1187do another open on it, since open will close it for you.
1188(See
1189.IR open .)
1190However, an explicit close on an input file resets the line counter ($.), while
1191the implicit close done by
1192.I open
1193does not.
1194Also, closing a pipe will wait for the process executing on the pipe to complete,
1195in case you want to look at the output of the pipe afterwards.
1196Example:
1197.nf
1198
1199.ne 4
1200 open(output,'|sort >foo'); # pipe to sort
378cc40b 1201 .\|.\|. # print stuff to output
8d063cd8 1202 close(output); # wait for sort to finish
1203 open(input,'foo'); # get sort's results
1204
1205.fi
378cc40b 1206FILEHANDLE may be an expression whose value gives the real filehandle name.
8d063cd8 1207.Ip "crypt(PLAINTEXT,SALT)" 8 6
1208Encrypts a string exactly like the crypt() function in the C library.
1209Useful for checking the password file for lousy passwords.
1210Only the guys wearing white hats should do this.
378cc40b 1211.Ip "delete $ASSOC{KEY}" 8 6
1212Deletes the specified value from the specified associative array.
1213Returns the deleted value;
1214The following deletes all the values of an associative array:
1215.nf
1216
1217.ne 3
1218 foreach $key (keys(ARRAY)) {
1219 delete $ARRAY{$key};
1220 }
1221
1222.fi
1223(But it would be faster to use the reset command.)
8d063cd8 1224.Ip "die EXPR" 8 6
378cc40b 1225Prints the value of EXPR to stderr and exits with the current value of $!
1226(errno).
1227If $! is 0, exits with the value of ($? >> 8) (`command` status).
1228If ($? >> 8) is 0, exits with 255.
8d063cd8 1229Equivalent examples:
1230.nf
1231
1232.ne 3
378cc40b 1233 die "Can't cd to spool.\en" unless chdir '/usr/spool/news';
1234
1235 chdir '/usr/spool/news' || die "Can't cd to spool.\en"
1236
1237.fi
1238.Sp
1239If the value of EXPR does not end in a newline, the current script line
1240number and input line number (if any) are also printed, and a newline is
1241supplied.
13281fa4 1242Hint: sometimes appending \*(L", stopped\*(R" to your message will cause it to make
1243better sense when the string \*(L"at foo line 123\*(R" is appended.
1244Suppose you are running script \*(L"canasta\*(R".
378cc40b 1245.nf
1246
1247.ne 7
1248 die "/etc/games is no good";
1249 die "/etc/games is no good, stopped";
1250
1251produce, respectively
8d063cd8 1252
378cc40b 1253 /etc/games is no good at canasta line 123.
1254 /etc/games is no good, stopped at canasta line 123.
8d063cd8 1255
1256.fi
8d063cd8 1257See also
1258.IR exit .
1259.Ip "do BLOCK" 8 4
1260Returns the value of the last command in the sequence of commands indicated
1261by BLOCK.
1262When modified by a loop modifier, executes the BLOCK once before testing the
1263loop condition.
1264(On other statements the loop modifiers test the conditional first.)
1265.Ip "do SUBROUTINE (LIST)" 8 3
1266Executes a SUBROUTINE declared by a
1267.I sub
1268declaration, and returns the value
1269of the last expression evaluated in SUBROUTINE.
378cc40b 1270If you pass arrays as part of LIST you may wish to pass the length
1271of the array in front of each array.
8d063cd8 1272(See the section on subroutines later on.)
378cc40b 1273SUBROUTINE may be a scalar variable, in which case the variable contains
1274the name of the subroutine to execute.
13281fa4 1275The parentheses are required to avoid confusion with the next form of \*(L"do\*(R".
378cc40b 1276.Ip "do EXPR" 8 3
1277Uses the value of EXPR as a filename and executes the contents of the file
1278as a perl script.
1279It's primary use is to include subroutines from a perl subroutine library.
1280.nf
1281 do 'stat.pl';
1282
1283is just like
1284
1285 eval `cat stat.pl`;
1286
1287.fi
1288except that it's more efficient, more concise, keeps track of the current
1289filename for error messages, and searches all the -I libraries if the file
1290isn't in the current directory (see also the @INC array in Predefined Names).
1291It's the same, however, in that it does reparse the file every time you
1292call it, so if you are going to use the file inside a loop you might prefer
1293to use #include, at the expense of a little more startup time.
1294(The main problem with #include is that cpp doesn't grok # comments--a
13281fa4 1295workaround is to use \*(L";#\*(R" for standalone comments.)
378cc40b 1296Note that the following are NOT equivalent:
1297.nf
1298
1299.ne 2
1300 do $foo; # eval a file
1301 do $foo(); # call a subroutine
1302
1303.fi
8d063cd8 1304.Ip "each(ASSOC_ARRAY)" 8 6
1305Returns a 2 element array consisting of the key and value for the next
1306value of an associative array, so that you can iterate over it.
1307Entries are returned in an apparently random order.
1308When the array is entirely read, a null array is returned (which when
1309assigned produces a FALSE (0) value).
1310The next call to each() after that will start iterating again.
1311The iterator can be reset only by reading all the elements from the array.
378cc40b 1312You must not modify the array while iterating over it.
1313There is a single iterator for each associative array, shared by all
1314each(), keys() and values() function calls in the program.
8d063cd8 1315The following prints out your environment like the printenv program, only
1316in a different order:
1317.nf
1318
1319.ne 3
1320 while (($key,$value) = each(ENV)) {
1321 print "$key=$value\en";
1322 }
1323
1324.fi
1325See also keys() and values().
1326.Ip "eof(FILEHANDLE)" 8 8
1327.Ip "eof" 8
1328Returns 1 if the next read on FILEHANDLE will return end of file, or if
1329FILEHANDLE is not open.
378cc40b 1330FILEHANDLE may be an expression whose value gives the real filehandle name.
1331An eof without an argument returns the eof status for the last file read.
1332Empty parentheses () may be used to indicate the pseudo file formed of the
8d063cd8 1333files listed on the command line, i.e. eof() is reasonable to use inside
378cc40b 1334a while (<>) loop to detect the end of only the last file.
1335Use eof(ARGV) or eof without the parens to test EACH file in a while (<>) loop.
1336Examples:
8d063cd8 1337.nf
1338
1339.ne 7
378cc40b 1340 # insert dashes just before last line of last file
8d063cd8 1341 while (<>) {
1342 if (eof()) {
1343 print "--------------\en";
1344 }
1345 print;
1346 }
1347
378cc40b 1348.ne 7
1349 # reset line numbering on each input file
1350 while (<>) {
1351 print "$.\et$_";
1352 if (eof) { # Not eof().
1353 close(ARGV);
1354 }
1355 }
1356
8d063cd8 1357.fi
83b4785a 1358.Ip "eval EXPR" 8 6
1359EXPR is parsed and executed as if it were a little perl program.
1360It is executed in the context of the current perl program, so that
1361any variable settings, subroutine or format definitions remain afterwards.
1362The value returned is the value of the last expression evaluated, just
1363as with subroutines.
1364If there is a syntax error or runtime error, a null string is returned by
1365eval, and $@ is set to the error message.
1366If there was no error, $@ is null.
378cc40b 1367If EXPR is omitted, evaluates $_.
8d063cd8 1368.Ip "exec LIST" 8 6
1369If there is more than one argument in LIST,
1370calls execvp() with the arguments in LIST.
1371If there is only one argument, the argument is checked for shell metacharacters.
1372If there are any, the entire argument is passed to /bin/sh -c for parsing.
1373If there are none, the argument is split into words and passed directly to
1374execvp(), which is more efficient.
1375Note: exec (and system) do not flush your output buffer, so you may need to
1376set $| to avoid lost output.
378cc40b 1377Examples:
1378.nf
1379
1380 exec '/bin/echo', 'Your arguments are: ', @ARGV;
1381 exec "sort $outfile | uniq";
1382
1383.fi
8d063cd8 1384.Ip "exit EXPR" 8 6
1385Evaluates EXPR and exits immediately with that value.
1386Example:
1387.nf
1388
1389.ne 2
1390 $ans = <stdin>;
1391 exit 0 \|if \|$ans \|=~ \|/\|^[Xx]\|/\|;
1392
1393.fi
1394See also
1395.IR die .
1396.Ip "exp(EXPR)" 8 3
1397Returns e to the power of EXPR.
1398.Ip "fork" 8 4
1399Does a fork() call.
1400Returns the child pid to the parent process and 0 to the child process.
1401Note: unflushed buffers remain unflushed in both processes, which means
1402you may need to set $| to avoid duplicate output.
1403.Ip "gmtime(EXPR)" 8 4
1404Converts a time as returned by the time function to a 9-element array with
1405the time analyzed for the Greenwich timezone.
1406Typically used as follows:
1407.nf
1408
1409.ne 3
1410 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)
1411 = gmtime(time);
1412
1413.fi
378cc40b 1414All array elements are numeric, and come straight out of a struct tm.
1415In particular this means that $mon has the range 0..11 and $wday has the
1416range 0..6.
8d063cd8 1417''' End of part 1