# converted to html commands.
#
-sub process_text1($$;$);
+sub process_text1($$;$$);
+sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
+sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
sub process_text {
return if $ignore;
$$tref = $res;
}
-sub process_text1($$;$){
- my( $lev, $rstr, $func ) = @_;
+sub process_text1($$;$$){
+ my( $lev, $rstr, $func, $closing ) = @_;
$lev++ unless defined $func;
my $res = '';
} elsif( $func eq 'C' ){
# C<code> - can be a ref or <CODE></CODE>
# need to extract text
- my $par = go_ahead( $rstr, 'C' );
+ my $par = go_ahead( $rstr, 'C', $closing );
## clean-up of the link target
my $text = depod( $par );
## L<text|cross-ref> => produce text, use cross-ref for linking
## L<cross-ref> => make text from cross-ref
## need to extract text
- my $par = go_ahead( $rstr, 'L' );
+ my $par = go_ahead( $rstr, 'L', $closing );
# some L<>'s that shouldn't be:
# a) full-blown URL's are emitted as-is
unless $$rstr =~ s/^>//;
} else {
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
+ my $term = pattern $closing;
+ while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
# all others: either recurse into new function or
- # terminate at closing angle bracket
+ # terminate at closing angle bracket(s)
my $pt = $1;
- $pt .= '>' if $2 eq '>' && $lev == 1;
+ $pt .= $2 if !$3 && $lev == 1;
$res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
- return $res if $2 eq '>' && $lev > 1;
- if( $2 ne '>' ){
- $res .= process_text1( $lev, $rstr, substr($2,0,1) );
+ return $res if !$3 && $lev > 1;
+ if( $3 ){
+ $res .= process_text1( $lev, $rstr, $3, closing $4 );
}
}
#
# go_ahead: extract text of an IS (can be nested)
#
-sub go_ahead($$){
- my( $rstr, $func ) = @_;
+sub go_ahead($$$){
+ my( $rstr, $func, $closing ) = @_;
my $res = '';
- my $level = 1;
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
+ my @closing = ($closing);
+ while( $$rstr =~
+ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
$res .= $1;
- if( $2 eq '>' ){
- return $res if --$level == 0;
+ unless( $3 ){
+ shift @closing;
+ return $res unless @closing;
} else {
- ++$level;
+ unshift @closing, closing $4;
}
$res .= $2;
}
$E2c{verbar} = '|';
$E2c{amp} = '&'; # in Tk's pods
-sub depod1($;$);
+sub depod1($;$$);
sub depod($){
my $string;
}
}
-sub depod1($;$){
- my( $rstr, $func ) = @_;
+sub depod1($;$$){
+ my( $rstr, $func, $closing ) = @_;
my $res = '';
return $res unless defined $$rstr;
if( ! defined( $func ) ){
# skip to next begin of an interior sequence
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
+ while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
# recurse into its text
- $res .= $1 . depod1( $rstr, $2 );
+ $res .= $1 . depod1( $rstr, $2, closing $3);
}
$res .= $$rstr;
} elsif( $func eq 'E' ){
} else {
# all others: either recurse into new function or
# terminate at closing angle bracket
- while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
+ my $term = pattern $closing;
+ while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
$res .= $1;
- last if $2 eq '>';
- $res .= depod1( $rstr, substr($2,0,1) );
+ last unless $3;
+ $res .= depod1( $rstr, $3, closing $4 );
}
## If we're here and $2 ne '>': undelimited interior sequence.
## Ignored, as this is called without proper indication of where we are.
The C<-T> and C<-B> switches work as follows. The first block or so of the
file is examined for odd characters such as strange control codes or
-characters with the high bit set. If too many strange characters (E<gt>30%)
+characters with the high bit set. If too many strange characters (>30%)
are found, it's a C<-B> file, otherwise it's a C<-T> file. Also, any file
containing null in the first block is considered a binary file. If C<-T>
or C<-B> is used on a filehandle, the current stdio buffer is examined
Be aware that the optimizer might have optimized call frames away before
C<caller> had a chance to get the information. That means that C<caller(N)>
might not return information about the call frame you expect it do, for
-C<N E<gt> 1>. In particular, C<@DB::args> might have information from the
+C<< N > 1 >>. In particular, C<@DB::args> might have information from the
previous time C<caller> was called.
=item chdir EXPR
Outside an C<eval>, prints the value of LIST to C<STDERR> and
exits with the current value of C<$!> (errno). If C<$!> is C<0>,
-exits with the value of C<($? E<gt>E<gt> 8)> (backtick `command`
-status). If C<($? E<gt>E<gt> 8)> is C<0>, exits with C<255>. Inside
+exits with the value of C<<< ($? >> 8) >>> (backtick `command`
+status). If C<<< ($? >> 8) >>> is C<0>, exits with C<255>. Inside
an C<eval(),> the error message is stuffed into C<$@> and the
C<eval> is terminated with the undefined value. This makes
C<die> the way to raise an exception.
An C<eof> without an argument uses the last file read. Using C<eof()>
with empty parentheses is very different. It refers to the pseudo file
formed from the files listed on the command line and accessed via the
-C<E<lt>E<gt>> operator. Since C<E<lt>E<gt>> isn't explicitly opened,
-as a normal filehandle is, an C<eof()> before C<E<lt>E<gt>> has been
+C<< <> >> operator. Since C<< <> >> isn't explicitly opened,
+as a normal filehandle is, an C<eof()> before C<< <> >> has been
used will cause C<@ARGV> to be examined to determine if input is
available.
-In a C<while (E<lt>E<gt>)> loop, C<eof> or C<eof(ARGV)> can be used to
+In a C<< while (<>) >> loop, C<eof> or C<eof(ARGV)> can be used to
detect the end of each file, C<eof()> will only detect the end of the
last file. Examples:
Although the deepest nested array or hash will not spring into existence
just because its existence was tested, any intervening ones will.
-Thus C<$ref-E<gt>{"A"}> and C<$ref-E<gt>{"A"}-E<gt>{"B"}> will spring
+Thus C<< $ref->{"A"} >> and C<< $ref->{"A"}->{"B"} >> will spring
into existence due to the existence test for the $key element above.
This happens anywhere the arrow operator is used, including even:
Returns the value of EXPR with filename expansions such as the
standard Unix shell F</bin/csh> would do. This is the internal function
-implementing the C<E<lt>*.cE<gt>> operator, but you can use it directly.
-If EXPR is omitted, C<$_> is used. The C<E<lt>*.cE<gt>> operator is
+implementing the C<< <*.c> >> operator, but you can use it directly.
+If EXPR is omitted, C<$_> is used. The C<< <*.c> >> operator is
discussed in more detail in L<perlop/"I/O Operators">.
Beginning with v5.6.0, this operator is implemented using the standard
to get the correct function definitions. If F<ioctl.ph> doesn't
exist or doesn't have the correct definitions you'll have to roll your
-own, based on your C header files such as F<E<lt>sys/ioctl.hE<gt>>.
+own, based on your C header files such as F<< <sys/ioctl.h> >>.
(There is a Perl script called B<h2ph> that comes with the Perl kit that
may help you in this, but it's nontrivial.) SCALAR will be read and/or
written depending on the FUNCTION--a pointer to the string value of SCALAR
to open.) See L<perlopentut> for a kinder, gentler explanation of opening
files.
-If MODE is C<'E<lt>'> or nothing, the file is opened for input.
-If MODE is C<'E<gt>'>, the file is truncated and opened for
-output, being created if necessary. If MODE is C<'E<gt>E<gt>'>,
+If MODE is C<< '<' >> or nothing, the file is opened for input.
+If MODE is C<< '>' >>, the file is truncated and opened for
+output, being created if necessary. If MODE is C<<< '>>' >>>,
the file is opened for appending, again being created if necessary.
-You can put a C<'+'> in front of the C<'E<gt>'> or C<'E<lt>'> to indicate that
-you want both read and write access to the file; thus C<'+E<lt>'> is almost
-always preferred for read/write updates--the C<'+E<gt>'> mode would clobber the
+You can put a C<'+'> in front of the C<< '>' >> or C<< '<' >> to indicate that
+you want both read and write access to the file; thus C<< '+<' >> is almost
+always preferred for read/write updates--the C<< '+>' >> mode would clobber the
file first. You can't usually use either read-write mode for updating
textfiles, since they have variable length records. See the B<-i>
switch in L<perlrun> for a better approach. The file is created with
permissions of C<0666> modified by the process' C<umask> value.
-These various prefixes correspond to the fopen(3) modes of C<'r'>, C<'r+'>, C<'w'>,
-C<'w+'>, C<'a'>, and C<'a+'>.
+These various prefixes correspond to the fopen(3) modes of C<'r'>, C<'r+'>,
+C<'w'>, C<'w+'>, C<'a'>, and C<'a+'>.
In the 2-arguments (and 1-argument) form of the call the mode and
filename should be concatenated (in this order), possibly separated by
-spaces. It is possible to omit the mode if the mode is C<'E<lt>'>.
+spaces. It is possible to omit the mode if the mode is C<< '<' >>.
If the filename begins with C<'|'>, the filename is interpreted as a
command to which output is to be piped, and if the filename ends with a
and L<perlipc/"Bidirectional Communication"> for alternatives.)
In the 2-arguments (and 1-argument) form opening C<'-'> opens STDIN
-and opening C<'E<gt>-'> opens STDOUT.
+and opening C<< '>-' >> opens STDOUT.
Open returns
nonzero upon success, the undefined value otherwise. If the C<open>
}
You may also, in the Bourne shell tradition, specify an EXPR beginning
-with C<'E<gt>&'>, in which case the rest of the string is interpreted as the
+with C<< '>&' >>, in which case the rest of the string is interpreted as the
name of a filehandle (or file descriptor, if numeric) to be
-duped and opened. You may use C<&> after C<E<gt>>, C<E<gt>E<gt>>,
-C<E<lt>>, C<+E<gt>>, C<+E<gt>E<gt>>, and C<+E<lt>>. The
+duped and opened. You may use C<&> after C<< > >>, C<<< >> >>>,
+C<< < >>, C<< +> >>, C<<< +>> >>>, and C<< +< >>. The
mode you specify should match the mode of the original filehandle.
(Duping a filehandle does not take into account any existing contents of
stdio buffers.) Duping file handles is not yet supported for 3-argument
print STDOUT "stdout 2\n";
print STDERR "stderr 2\n";
-If you specify C<'E<lt>&=N'>, where C<N> is a number, then Perl will do an
+If you specify C<< '<&=N' >>, where C<N> is a number, then Perl will do an
equivalent of C's C<fdopen> of that file descriptor; this is more
parsimonious of file descriptors. For example:
because they obey the native byteorder and endianness. For example a
4-byte integer 0x12345678 (305419896 decimal) be ordered natively
(arranged in and handled by the CPU registers) into bytes as
-
+
0x12 0x34 0x56 0x78 # little-endian
0x78 0x56 0x34 0x12 # big-endian
-
+
Basically, the Intel, Alpha, and VAX CPUs are little-endian, while
everybody else, for example Motorola m68k/88k, PPC, Sparc, HP PA,
Power, and Cray are big-endian. MIPS can be either: Digital used it
the classic "Gulliver's Travels" (via the paper "On Holy Wars and a
Plea for Peace" by Danny Cohen, USC/ISI IEN 137, April 1, 1980) and
the egg-eating habits of the Lilliputians.
-
+
Some systems may have even weirder byte orders such as
-
+
0x56 0x78 0x12 0x34
0x34 0x12 0x78 0x56
-
+
You can see your system's preference with
print join(" ", map { sprintf "%#02x", $_ }
context (i.e. file slurp mode), and when an empty file is read, it
returns C<''> the first time, followed by C<undef> subsequently.
-This is the internal function implementing the C<E<lt>EXPRE<gt>>
-operator, but you can use it directly. The C<E<lt>EXPRE<gt>>
+This is the internal function implementing the C<< <EXPR> >>
+operator, but you can use it directly. The C<< <EXPR> >>
operator is discussed in more detail in L<perlop/"I/O Operators">.
$line = <STDIN>;
EOF on your read, and then sleep for a while, you might have to stick in a
seek() to reset things. The C<seek> doesn't change the current position,
but it I<does> clear the end-of-file condition on the handle, so that the
-next C<E<lt>FILEE<gt>> makes Perl try again to read something. We hope.
+next C<< <FILE> >> makes Perl try again to read something. We hope.
If that doesn't work (some stdios are particularly cantankerous), then
you may need something more like this:
select(undef, undef, undef, 0.25);
B<WARNING>: One should not attempt to mix buffered I/O (like C<read>
-or E<lt>FHE<gt>) with C<select>, except as permitted by POSIX, and even
+or <FH>) with C<select>, except as permitted by POSIX, and even
then only on POSIX systems. You have to use C<sysread> instead.
=item semctl ID,SEMNUM,CMD,ARG
is omitted, C<sort>s in standard string comparison order. If SUBNAME is
specified, it gives the name of a subroutine that returns an integer
less than, equal to, or greater than C<0>, depending on how the elements
-of the list are to be ordered. (The C<E<lt>=E<gt>> and C<cmp>
+of the list are to be ordered. (The C<< <=> >> and C<cmp>
operators are extremely useful in such routines.) SUBNAME may be a
scalar variable name (unsubscripted), in which case the value provides
the name of (or a reference to) the actual subroutine to use. In place
||
$a->[2] cmp $b->[2]
} map { [$_, /=(\d+)/, uc($_)] } @old;
-
+
# using a prototype allows you to use any comparison subroutine
# as a sort subroutine (including other package's subroutines)
package other;
ID, among other things. In versions of Perl prior to 5.004 the default
seed was just the current C<time>. This isn't a particularly good seed,
so many old programs supply their own seed value (often C<time ^ $$> or
-C<time ^ ($$ + ($$ E<lt>E<lt> 15))>), but that isn't necessary any more.
+C<time ^ ($$ + ($$ << 15))>), but that isn't necessary any more.
In fact, it's usually not necessary to call C<srand> at all, because if
it is not called explicitly, it is called implicitly at the first use of
S_IRWXU S_IRUSR S_IWUSR S_IXUSR
S_IRWXG S_IRGRP S_IWGRP S_IXGRP
S_IRWXO S_IROTH S_IWOTH S_IXOTH
-
+
# Setuid/Setgid/Stickiness.
S_ISUID S_ISGID S_ISVTX S_ISTXT
sub ordinal { unpack("c",$_[0]); } # same as ord()
In addition to fields allowed in pack(), you may prefix a field with
-a %E<lt>numberE<gt> to indicate that
-you want a E<lt>numberE<gt>-bit checksum of the items instead of the items
+a %<number> to indicate that
+you want a <number>-bit checksum of the items instead of the items
themselves. Default is a 16-bit checksum. Checksum is calculated by
summing numeric values of expanded values (for string fields the sum of
C<ord($char)> is taken, for bit fields the sum of zeroes and ones).
=head2 The Arrow Operator
-"C<-E<gt>>" is an infix dereference operator, just as it is in C
+"C<< -> >>" is an infix dereference operator, just as it is in C
and C++. If the right side is either a C<[...]>, C<{...}>, or a
C<(...)> subscript, then the left side must be either a hard or
symbolic reference to an array, a hash, or a subroutine respectively.
=head2 Relational Operators
-Binary "E<lt>" returns true if the left argument is numerically less than
+Binary "<" returns true if the left argument is numerically less than
the right argument.
-Binary "E<gt>" returns true if the left argument is numerically greater
+Binary ">" returns true if the left argument is numerically greater
than the right argument.
-Binary "E<lt>=" returns true if the left argument is numerically less than
+Binary "<=" returns true if the left argument is numerically less than
or equal to the right argument.
-Binary "E<gt>=" returns true if the left argument is numerically greater
+Binary ">=" returns true if the left argument is numerically greater
than or equal to the right argument.
Binary "lt" returns true if the left argument is stringwise less than
Binary "!=" returns true if the left argument is numerically not equal
to the right argument.
-Binary "E<lt>=E<gt>" returns -1, 0, or 1 depending on whether the left
+Binary "<=>" returns -1, 0, or 1 depending on whether the left
argument is numerically less than, equal to, or greater than the right
argument.
In list context, it's just the list argument separator, and inserts
both its arguments into the list.
-The =E<gt> digraph is mostly just a synonym for the comma operator. It's useful for
+The => digraph is mostly just a synonym for the comma operator. It's useful for
documenting arguments that come in pairs. As of release 5.001, it also forces
any word to the left of it to be interpreted as a string.
that
q{foo{bar}baz}
-
+
is the same as
'foo{bar}baz'
text is not evaluated as a command. If the
PATTERN is delimited by bracketing quotes, the REPLACEMENT has its own
pair of quotes, which may or may not be bracketing quotes, e.g.,
-C<s(foo)(bar)> or C<sE<lt>fooE<gt>/bar/>. A C</e> will cause the
+C<s(foo)(bar)> or C<< s<foo>/bar/ >>. A C</e> will cause the
replacement portion to be interpreted as a full-fledged Perl expression
and eval()ed right then and there. It is, however, syntax checked at
compile-time.
s/([^ ]*) *([^ ]*)/$2 $1/; # reverse 1st two fields
Note the use of $ instead of \ in the last example. Unlike
-B<sed>, we use the \E<lt>I<digit>E<gt> form in only the left hand side.
-Anywhere else it's $E<lt>I<digit>E<gt>.
+B<sed>, we use the \<I<digit>> form in only the left hand side.
+Anywhere else it's $<I<digit>>.
Occasionally, you can't use just a C</g> to get all the changes
to occur that you might want. Here are two common cases:
The first pass is finding the end of the quoted construct, whether
it be a multicharacter delimiter C<"\nEOF\n"> in the C<<<EOF>
construct, a C</> that terminates a C<qq//> construct, a C<]> which
-terminates C<qq[]> construct, or a C<E<gt>> which terminates a
-fileglob started with C<E<lt>>.
+terminates C<qq[]> construct, or a C<< > >> which terminates a
+fileglob started with C<< < >>.
When searching for single-character non-pairing delimiters, such
as C</>, combinations of C<\\> and C<\/> are skipped. However,
The only interpolation is removal of C<\> from pairs C<\\>.
-=item C<"">, C<``>, C<qq//>, C<qx//>, C<<file*globE<gt>>
+=item C<"">, C<``>, C<qq//>, C<qx//>, C<< <file*glob> >>
C<\Q>, C<\U>, C<\u>, C<\L>, C<\l> (possibly paired with C<\E>) are
converted to corresponding Perl constructs. Thus, C<"$foo\Qbaz$bar">
Note also that the interpolation code needs to make a decision on
where the interpolated scalar ends. For instance, whether
-C<"a $b -E<gt> {c}"> really means:
+C<< "a $b -> {c}" >> really means:
"a " . $b . " -> {c}";
while (($_ = <STDIN>) ne '0') { ... }
while (<STDIN>) { last unless $_; ... }
-In other boolean contexts, C<E<lt>I<filehandle>E<gt>> without an
+In other boolean contexts, C<< <I<filehandle>> >> without an
explicit C<defined> test or comparison elicit a warning if the B<-w>
command-line switch (the C<$^W> variable) is in effect.
the open() function, amongst others. See L<perlopentut> and
L<perlfunc/open> for details on this.
-If a E<lt>FILEHANDLEE<gt> is used in a context that is looking for
+If a <FILEHANDLE> is used in a context that is looking for
a list, a list comprising all input lines is returned, one line per
list element. It's easy to grow to a rather large data space this
way, so use with care.
-E<lt>FILEHANDLEE<gt> may also be spelled C<readline(*FILEHANDLE)>.
+<FILEHANDLE> may also be spelled C<readline(*FILEHANDLE)>.
See L<perlfunc/readline>.
-The null filehandle E<lt>E<gt> is special: it can be used to emulate the
-behavior of B<sed> and B<awk>. Input from E<lt>E<gt> comes either from
+The null filehandle <> is special: it can be used to emulate the
+behavior of B<sed> and B<awk>. Input from <> comes either from
standard input, or from each file listed on the command line. Here's
-how it works: the first time E<lt>E<gt> is evaluated, the @ARGV array is
+how it works: the first time <> is evaluated, the @ARGV array is
checked, and if it is empty, C<$ARGV[0]> is set to "-", which when opened
gives you standard input. The @ARGV array is then processed as a list
of filenames. The loop
except that it isn't so cumbersome to say, and will actually work.
It really does shift the @ARGV array and put the current filename
into the $ARGV variable. It also uses filehandle I<ARGV>
-internally--E<lt>E<gt> is just a synonym for E<lt>ARGVE<gt>, which
+internally--<> is just a synonym for <ARGV>, which
is magical. (The pseudo code above doesn't work because it treats
-E<lt>ARGVE<gt> as non-magical.)
+<ARGV> as non-magical.)
-You can modify @ARGV before the first E<lt>E<gt> as long as the array ends up
+You can modify @ARGV before the first <> as long as the array ends up
containing the list of filenames you really want. Line numbers (C<$.>)
continue as though the input were one big happy file. See the example
in L<perlfunc/eof> for how to reset line numbers on each file.
# ... # code for each line
}
-The E<lt>E<gt> symbol will return C<undef> for end-of-file only once.
+The <> symbol will return C<undef> for end-of-file only once.
If you call it again after this, it will assume you are processing another
@ARGV list, and if you haven't set @ARGV, will read input from STDIN.
If angle brackets contain is a simple scalar variable (e.g.,
-E<lt>$fooE<gt>), then that variable contains the name of the
+<$foo>), then that variable contains the name of the
filehandle to input from, or its typeglob, or a reference to the
same. For example:
reference, it is interpreted as a filename pattern to be globbed, and
either a list of filenames or the next filename in the list is returned,
depending on context. This distinction is determined on syntactic
-grounds alone. That means C<E<lt>$xE<gt>> is always a readline() from
-an indirect handle, but C<E<lt>$hash{key}E<gt>> is always a glob().
+grounds alone. That means C<< <$x> >> is always a readline() from
+an indirect handle, but C<< <$hash{key}> >> is always a glob().
That's because $x is a simple scalar variable, but C<$hash{key}> is
not--it's a hash element.
One level of double-quote interpretation is done first, but you can't
-say C<E<lt>$fooE<gt>> because that's an indirect filehandle as explained
+say C<< <$foo> >> because that's an indirect filehandle as explained
in the previous paragraph. (In older versions of Perl, programmers
would insert curly brackets to force interpretation as a filename glob:
-C<E<lt>${foo}E<gt>>. These days, it's considered cleaner to call the
+C<< <${foo}> >>. These days, it's considered cleaner to call the
internal function directly as C<glob($foo)>, which is probably the right
way to have done it in the first place.) For example: