local-lib5/man/man3/perlpodspec.3pm

   1 .\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.3
   2 .\"
   3 .\" Standard preamble:
   4 .\" ========================================================================
   5 .de Sh \" Subsection heading
   6 .br
   7 .if t .Sp
   8 .ne 5
   9 .PP
  10 \fB\\$1\fR
  11 .PP
  12 ..
  13 .de Sp \" Vertical space (when we can't use .PP)
  14 .if t .sp .5v
  15 .if n .sp
  16 ..
  17 .de Vb \" Begin verbatim text
  18 .ft CW
  19 .nf
  20 .ne \\$1
  21 ..
  22 .de Ve \" End verbatim text
  23 .ft R
  24 .fi
  25 ..
  26 .\" Set up some character translations and predefined strings.  \*(-- will
  27 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
  28 .\" double quote, and \*(R" will give a right double quote.  | will give a
  29 .\" real vertical bar.  \*(C+ will give a nicer C++.  Capital omega is used to
  30 .\" do unbreakable dashes and therefore won't be available.  \*(C` and \*(C'
  31 .\" expand to `' in nroff, nothing in troff, for use with C<>.
  32 .tr \(*W-|\(bv\*(Tr
  33 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  34 .ie n \{\
  35 .    ds -- \(*W-
  36 .    ds PI pi
  37 .    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
  38 .    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
  39 .    ds L" ""
  40 .    ds R" ""
  41 .    ds C` ""
  42 .    ds C' ""
  43 'br\}
  44 .el\{\
  45 .    ds -- \|\(em\|
  46 .    ds PI \(*p
  47 .    ds L" ``
  48 .    ds R" ''
  49 'br\}
  50 .\"
  51 .\" If the F register is turned on, we'll generate index entries on stderr for
  52 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
  53 .\" entries marked with X<> in POD.  Of course, you'll have to process the
  54 .\" output yourself in some meaningful fashion.
  55 .if \nF \{\
  56 .    de IX
  57 .    tm Index:\\$1\t\\n%\t"\\$2"
  58 ..
  59 .    nr % 0
  60 .    rr F
  61 .\}
  62 .\"
  63 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
  64 .\" way too many mistakes in technical documents.
  65 .hy 0
  66 .if n .na
  67 .\"
  68 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
  69 .\" Fear.  Run.  Save yourself.  No user-serviceable parts.
  70 .    \" fudge factors for nroff and troff
  71 .if n \{\
  72 .    ds #H 0
  73 .    ds #V .8m
  74 .    ds #F .3m
  75 .    ds #[ \f1
  76 .    ds #] \fP
  77 .\}
  78 .if t \{\
  79 .    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
  80 .    ds #V .6m
  81 .    ds #F 0
  82 .    ds #[ \&
  83 .    ds #] \&
  84 .\}
  85 .    \" simple accents for nroff and troff
  86 .if n \{\
  87 .    ds ' \&
  88 .    ds ` \&
  89 .    ds ^ \&
  90 .    ds , \&
  91 .    ds ~ ~
  92 .    ds /
  93 .\}
  94 .if t \{\
  95 .    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
  96 .    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
  97 .    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
  98 .    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
  99 .    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 100 .    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 101 .\}
 102 .    \" troff and (daisy-wheel) nroff accents
 103 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 104 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 105 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 106 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 107 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 108 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 109 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 110 .ds ae a\h'-(\w'a'u*4/10)'e
 111 .ds Ae A\h'-(\w'A'u*4/10)'E
 112 .    \" corrections for vroff
 113 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 114 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 115 .    \" for low resolution devices (crt and lpr)
 116 .if \n(.H>23 .if \n(.V>19 \
 117 \{\
 118 .    ds : e
 119 .    ds 8 ss
 120 .    ds o a
 121 .    ds d- d\h'-1'\(ga
 122 .    ds D- D\h'-1'\(hy
 123 .    ds th \o'bp'
 124 .    ds Th \o'LP'
 125 .    ds ae ae
 126 .    ds Ae AE
 127 .\}
 128 .rm #[ #] #H #V #F C
 129 .\" ========================================================================
 130 .\"
 131 .IX Title "perlpodspec 3"
 132 .TH perlpodspec 3 "2009-10-26" "perl v5.8.7" "User Contributed Perl Documentation"
 133 .SH "NAME"
 134 perlpodspec \- Plain Old Documentation: format specification and notes
 135 .SH "DESCRIPTION"
 136 .IX Header "DESCRIPTION"
 137 This document is detailed notes on the Pod markup language.  Most
 138 people will only have to read perlpod to know how to write
 139 in Pod, but this document may answer some incidental questions to do
 140 with parsing and rendering Pod.
 141 .PP
 142 In this document, \*(L"must\*(R" / \*(L"must not\*(R", \*(L"should\*(R" /
 143 \&\*(L"should not\*(R", and \*(L"may\*(R" have their conventional (cf. \s-1RFC\s0 2119)
 144 meanings: \*(L"X must do Y\*(R" means that if X doesn't do Y, it's against
 145 this specification, and should really be fixed.  \*(L"X should do Y\*(R"
 146 means that it's recommended, but X may fail to do Y, if there's a
 147 good reason.  \*(L"X may do Y\*(R" is merely a note that X can do Y at
 148 will (although it is up to the reader to detect any connotation of
 149 "and I think it would be \fInice\fR if X did Y\*(L" versus \*(R"it wouldn't
 150 really \fIbother\fR me if X did Y").
 151 .PP
 152 Notably, when I say \*(L"the parser should do Y\*(R", the
 153 parser may fail to do Y, if the calling application explicitly
 154 requests that the parser \fInot\fR do Y.  I often phrase this as
 155 \&\*(L"the parser should, by default, do Y.\*(R"  This doesn't \fIrequire\fR
 156 the parser to provide an option for turning off whatever
 157 feature Y is (like expanding tabs in verbatim paragraphs), although
 158 it implicates that such an option \fImay\fR be provided.
 159 .SH "Pod Definitions"
 160 .IX Header "Pod Definitions"
 161 Pod is embedded in files, typically Perl source files \*(-- although you
 162 can write a file that's nothing but Pod.
 163 .PP
 164 A \fBline\fR in a file consists of zero or more non-newline characters,
 165 terminated by either a newline or the end of the file.
 166 .PP
 167 A \fBnewline sequence\fR is usually a platform-dependent concept, but
 168 Pod parsers should understand it to mean any of \s-1CR\s0 (\s-1ASCII\s0 13), \s-1LF\s0
 169 (\s-1ASCII\s0 10), or a \s-1CRLF\s0 (\s-1ASCII\s0 13 followed immediately by \s-1ASCII\s0 10), in
 170 addition to any other system-specific meaning.  The first \s-1CR/CRLF/LF\s0
 171 sequence in the file may be used as the basis for identifying the
 172 newline sequence for parsing the rest of the file.
 173 .PP
 174 A \fBblank line\fR is a line consisting entirely of zero or more spaces
 175 (\s-1ASCII\s0 32) or tabs (\s-1ASCII\s0 9), and terminated by a newline or end\-of\-file.
 176 A \fBnon-blank line\fR is a line containing one or more characters other
 177 than space or tab (and terminated by a newline or end\-of\-file).
 178 .PP
 179 (\fINote:\fR Many older Pod parsers did not accept a line consisting of
 180 spaces/tabs and then a newline as a blank line \*(-- the only lines they
 181 considered blank were lines consisting of \fIno characters at all\fR,
 182 terminated by a newline.)
 183 .PP
 184 \&\fBWhitespace\fR is used in this document as a blanket term for spaces,
 185 tabs, and newline sequences.  (By itself, this term usually refers
 186 to literal whitespace.  That is, sequences of whitespace characters
 187 in Pod source, as opposed to "E<32>", which is a formatting
 188 code that \fIdenotes\fR a whitespace character.)
 189 .PP
 190 A \fBPod parser\fR is a module meant for parsing Pod (regardless of
 191 whether this involves calling callbacks or building a parse tree or
 192 directly formatting it).  A \fBPod formatter\fR (or \fBPod translator\fR)
 193 is a module or program that converts Pod to some other format (\s-1HTML\s0,
 194 plaintext, TeX, PostScript, \s-1RTF\s0).  A \fBPod processor\fR might be a
 195 formatter or translator, or might be a program that does something
 196 else with the Pod (like counting words, scanning for index points,
 197 etc.).
 198 .PP
 199 Pod content is contained in \fBPod blocks\fR.  A Pod block starts with a
 200 line that matches <m/\eA=[a\-zA\-Z]/>, and continues up to the next line
 201 that matches \f(CW\*(C`m/\eA=cut/\*(C'\fR \*(-- or up to the end of the file, if there is
 202 no \f(CW\*(C`m/\eA=cut/\*(C'\fR line.
 203 .PP
 204 Within a Pod block, there are \fBPod paragraphs\fR.  A Pod paragraph
 205 consists of non-blank lines of text, separated by one or more blank
 206 lines.
 207 .PP
 208 For purposes of Pod processing, there are four types of paragraphs in
 209 a Pod block:
 210 .IP "\(bu" 4
 211 A command paragraph (also called a \*(L"directive\*(R").  The first line of
 212 this paragraph must match \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR.  Command paragraphs are
 213 typically one line, as in:
 214 .Sp
 215 .Vb 1
 216 \&  =head1 NOTES
 217 .Ve
 218 .Sp
 219 .Vb 1
 220 \&  =item *
 221 .Ve
 222 .Sp
 223 But they may span several (non\-blank) lines:
 224 .Sp
 225 .Vb 3
 226 \&  =for comment
 227 \&  Hm, I wonder what it would look like if
 228 \&  you tried to write a BNF for Pod from this.
 229 .Ve
 230 .Sp
 231 .Vb 2
 232 \&  =head3 Dr. Strangelove, or: How I Learned to
 233 \&  Stop Worrying and Love the Bomb
 234 .Ve
 235 .Sp
 236 \&\fISome\fR command paragraphs allow formatting codes in their content
 237 (i.e., after the part that matches \f(CW\*(C`m/\eA=[a\-zA\-Z]\eS*\es*/\*(C'\fR), as in:
 238 .Sp
 239 .Vb 1
 240 \&  =head1 Did You Remember to C<use strict;>?
 241 .Ve
 242 .Sp
 243 In other words, the Pod processing handler for \*(L"head1\*(R" will apply the
 244 same processing to "Did You Remember to C<use strict;>?\*(L" that it
 245 would to an ordinary paragraph \*(-- i.e., formatting codes (like
 246 \&\*(R"C<...>") are parsed and presumably formatted appropriately, and
 247 whitespace in the form of literal spaces and/or tabs is not
 248 significant.
 249 .IP "\(bu" 4
 250 A \fBverbatim paragraph\fR.  The first line of this paragraph must be a
 251 literal space or tab, and this paragraph must not be inside a "=begin
 252 \&\fIidentifier\fR\*(L", ... \*(R"=end \fIidentifier\fR\*(L" sequence unless
 253 \&\*(R"\fIidentifier\fR\*(L" begins with a colon (\*(R":").  That is, if a paragraph
 254 starts with a literal space or tab, but \fIis\fR inside a
 255 "=begin \fIidentifier\fR\*(L", ... \*(R"=end \fIidentifier\fR\*(L" region, then it's
 256 a data paragraph, unless \*(R"\fIidentifier\fR" begins with a colon.
 257 .Sp
 258 Whitespace \fIis\fR significant in verbatim paragraphs (although, in
 259 processing, tabs are probably expanded).
 260 .IP "\(bu" 4
 261 An \fBordinary paragraph\fR.  A paragraph is an ordinary paragraph
 262 if its first line matches neither \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR nor
 263 \&\f(CW\*(C`m/\eA[ \et]/\*(C'\fR, \fIand\fR if it's not inside a "=begin \fIidentifier\fR\*(L",
 264 \&... \*(R"=end \fIidentifier\fR\*(L" sequence unless \*(R"\fIidentifier\fR\*(L" begins with
 265 a colon (\*(R":").
 266 .IP "\(bu" 4
 267 A \fBdata paragraph\fR.  This is a paragraph that \fIis\fR inside a "=begin
 268 \&\fIidentifier\fR\*(L" ... \*(R"=end \fIidentifier\fR\*(L" sequence where
 269 \&\*(R"\fIidentifier\fR" does \fInot\fR begin with a literal colon (\*(L":\*(R").  In
 270 some sense, a data paragraph is not part of Pod at all (i.e.,
 271 effectively it's \*(L"out\-of\-band\*(R"), since it's not subject to most kinds
 272 of Pod parsing; but it is specified here, since Pod
 273 parsers need to be able to call an event for it, or store it in some
 274 form in a parse tree, or at least just parse \fIaround\fR it.
 275 .PP
 276 For example: consider the following paragraphs:
 277 .PP
 278 .Vb 1
 279 \&  # <\- that's the 0th column
 280 .Ve
 281 .PP
 282 .Vb 1
 283 \&  =head1 Foo
 284 .Ve
 285 .PP
 286 .Vb 1
 287 \&  Stuff
 288 .Ve
 289 .PP
 290 .Vb 1
 291 \&    $foo\->bar
 292 .Ve
 293 .PP
 294 .Vb 1
 295 \&  =cut
 296 .Ve
 297 .PP
 298 Here, \*(L"=head1 Foo\*(R" and \*(L"=cut\*(R" are command paragraphs because the first
 299 line of each matches \f(CW\*(C`m/\eA=[a\-zA\-Z]/\*(C'\fR.  "\fI[space][space]\fR$foo\->bar\*(L"
 300 is a verbatim paragraph, because its first line starts with a literal
 301 whitespace character (and there's no \*(R"=begin\*(L"...\*(R"=end" region around).
 302 .PP
 303 The "=begin \fIidentifier\fR\*(L" ... \*(R"=end \fIidentifier\fR" commands stop
 304 paragraphs that they surround from being parsed as ordinary or verbatim
 305 paragraphs, if \fIidentifier\fR doesn't begin with a colon.  This
 306 is discussed in detail in the section
 307 \&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R".
 308 .SH "Pod Commands"
 309 .IX Header "Pod Commands"
 310 This section is intended to supplement and clarify the discussion in
 311 \&\*(L"Command Paragraph\*(R" in perlpod.  These are the currently recognized
 312 Pod commands:
 313 .ie n .IP """=head1"", ""=head2"", ""=head3"", ""=head4""" 4
 314 .el .IP "``=head1'', ``=head2'', ``=head3'', ``=head4''" 4
 315 .IX Item "=head1, =head2, =head3, =head4"
 316 This command indicates that the text in the remainder of the paragraph
 317 is a heading.  That text may contain formatting codes.  Examples:
 318 .Sp
 319 .Vb 1
 320 \&  =head1 Object Attributes
 321 .Ve
 322 .Sp
 323 .Vb 1
 324 \&  =head3 What B<Not> to Do!
 325 .Ve
 326 .ie n .IP """=pod""" 4
 327 .el .IP "``=pod''" 4
 328 .IX Item "=pod"
 329 This command indicates that this paragraph begins a Pod block.  (If we
 330 are already in the middle of a Pod block, this command has no effect at
 331 all.)  If there is any text in this command paragraph after \*(L"=pod\*(R",
 332 it must be ignored.  Examples:
 333 .Sp
 334 .Vb 1
 335 \&  =pod
 336 .Ve
 337 .Sp
 338 .Vb 1
 339 \&  This is a plain Pod paragraph.
 340 .Ve
 341 .Sp
 342 .Vb 1
 343 \&  =pod This text is ignored.
 344 .Ve
 345 .ie n .IP """=cut""" 4
 346 .el .IP "``=cut''" 4
 347 .IX Item "=cut"
 348 This command indicates that this line is the end of this previously
 349 started Pod block.  If there is any text after \*(L"=cut\*(R" on the line, it must be
 350 ignored.  Examples:
 351 .Sp
 352 .Vb 1
 353 \&  =cut
 354 .Ve
 355 .Sp
 356 .Vb 1
 357 \&  =cut The documentation ends here.
 358 .Ve
 359 .Sp
 360 .Vb 3
 361 \&  =cut
 362 \&  # This is the first line of program text.
 363 \&  sub foo { # This is the second.
 364 .Ve
 365 .Sp
 366 It is an error to try to \fIstart\fR a Pod block with a \*(L"=cut\*(R" command.  In
 367 that case, the Pod processor must halt parsing of the input file, and
 368 must by default emit a warning.
 369 .ie n .IP """=over""" 4
 370 .el .IP "``=over''" 4
 371 .IX Item "=over"
 372 This command indicates that this is the start of a list/indent
 373 region.  If there is any text following the \*(L"=over\*(R", it must consist
 374 of only a nonzero positive numeral.  The semantics of this numeral is
 375 explained in the \*(L"About =over...=back Regions\*(R" section, further
 376 below.  Formatting codes are not expanded.  Examples:
 377 .Sp
 378 .Vb 1
 379 \&  =over 3
 380 .Ve
 381 .Sp
 382 .Vb 1
 383 \&  =over 3.5
 384 .Ve
 385 .Sp
 386 .Vb 1
 387 \&  =over
 388 .Ve
 389 .ie n .IP """=item""" 4
 390 .el .IP "``=item''" 4
 391 .IX Item "=item"
 392 This command indicates that an item in a list begins here.  Formatting
 393 codes are processed.  The semantics of the (optional) text in the
 394 remainder of this paragraph are
 395 explained in the \*(L"About =over...=back Regions\*(R" section, further
 396 below.  Examples:
 397 .Sp
 398 .Vb 1
 399 \&  =item
 400 .Ve
 401 .Sp
 402 .Vb 1
 403 \&  =item *
 404 .Ve
 405 .Sp
 406 .Vb 1
 407 \&  =item      *
 408 .Ve
 409 .Sp
 410 .Vb 1
 411 \&  =item 14
 412 .Ve
 413 .Sp
 414 .Vb 1
 415 \&  =item   3.
 416 .Ve
 417 .Sp
 418 .Vb 1
 419 \&  =item C<< $thing\->stuff(I<dodad>) >>
 420 .Ve
 421 .Sp
 422 .Vb 2
 423 \&  =item For transporting us beyond seas to be tried for pretended
 424 \&  offenses
 425 .Ve
 426 .Sp
 427 .Vb 5
 428 \&  =item He is at this time transporting large armies of foreign
 429 \&  mercenaries to complete the works of death, desolation and
 430 \&  tyranny, already begun with circumstances of cruelty and perfidy
 431 \&  scarcely paralleled in the most barbarous ages, and totally
 432 \&  unworthy the head of a civilized nation.
 433 .Ve
 434 .ie n .IP """=back""" 4
 435 .el .IP "``=back''" 4
 436 .IX Item "=back"
 437 This command indicates that this is the end of the region begun
 438 by the most recent \*(L"=over\*(R" command.  It permits no text after the
 439 \&\*(L"=back\*(R" command.
 440 .ie n .IP """=begin formatname""" 4
 441 .el .IP "``=begin formatname''" 4
 442 .IX Item "=begin formatname"
 443 This marks the following paragraphs (until the matching \*(L"=end
 444 formatname\*(R") as being for some special kind of processing.  Unless
 445 \&\*(L"formatname\*(R" begins with a colon, the contained non-command
 446 paragraphs are data paragraphs.  But if \*(L"formatname\*(R" \fIdoes\fR begin
 447 with a colon, then non-command paragraphs are ordinary paragraphs
 448 or data paragraphs.  This is discussed in detail in the section
 449 \&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R".
 450 .Sp
 451 It is advised that formatnames match the regexp
 452 \&\f(CW\*(C`m/\eA:?[\-a\-zA\-Z0\-9_]+\ez/\*(C'\fR.  Implementors should anticipate future
 453 expansion in the semantics and syntax of the first parameter
 454 to \*(L"=begin\*(R"/\*(L"=end\*(R"/\*(L"=for\*(R".
 455 .ie n .IP """=end formatname""" 4
 456 .el .IP "``=end formatname''" 4
 457 .IX Item "=end formatname"
 458 This marks the end of the region opened by the matching
 459 \&\*(L"=begin formatname\*(R" region.  If \*(L"formatname\*(R" is not the formatname
 460 of the most recent open \*(L"=begin formatname\*(R" region, then this
 461 is an error, and must generate an error message.  This
 462 is discussed in detail in the section
 463 \&\*(L"About Data Paragraphs and \*(R"=begin/=end\*(L" Regions\*(R".
 464 .ie n .IP """=for formatname text...""" 4
 465 .el .IP "``=for formatname text...''" 4
 466 .IX Item "=for formatname text..."
 467 This is synonymous with:
 468 .Sp
 469 .Vb 1
 470 \&     =begin formatname
 471 .Ve
 472 .Sp
 473 .Vb 1
 474 \&     text...
 475 .Ve
 476 .Sp
 477 .Vb 1
 478 \&     =end formatname
 479 .Ve
 480 .Sp
 481 That is, it creates a region consisting of a single paragraph; that
 482 paragraph is to be treated as a normal paragraph if \*(L"formatname\*(R"
 483 begins with a \*(L":\*(R"; if \*(L"formatname\*(R" \fIdoesn't\fR begin with a colon,
 484 then \*(L"text...\*(R" will constitute a data paragraph.  There is no way
 485 to use \*(L"=for formatname text...\*(R" to express \*(L"text...\*(R" as a verbatim
 486 paragraph.
 487 .ie n .IP """=encoding encodingname""" 4
 488 .el .IP "``=encoding encodingname''" 4
 489 .IX Item "=encoding encodingname"
 490 This command, which should occur early in the document (at least
 491 before any non-US-ASCII data!), declares that this document is
 492 encoded in the encoding \fIencodingname\fR, which must be
 493 an encoding name that Encode recognizes.  (Encode's list
 494 of supported encodings, in Encode::Supported, is useful here.)
 495 If the Pod parser cannot decode the declared encoding, it
 496 should emit a warning and may abort parsing the document
 497 altogether.
 498 .Sp
 499 A document having more than one \*(L"=encoding\*(R" line should be
 500 considered an error.  Pod processors may silently tolerate this if
 501 the not-first \*(L"=encoding\*(R" lines are just duplicates of the
 502 first one (e.g., if there's a \*(L"=encoding utf8\*(R" line, and later on
 503 another \*(L"=encoding utf8\*(R" line).  But Pod processors should complain if
 504 there are contradictory \*(L"=encoding\*(R" lines in the same document
 505 (e.g., if there is a \*(L"=encoding utf8\*(R" early in the document and
 506 \&\*(L"=encoding big5\*(R" later).  Pod processors that recognize BOMs
 507 may also complain if they see an \*(L"=encoding\*(R" line
 508 that contradicts the \s-1BOM\s0 (e.g., if a document with a \s-1UTF\-16LE\s0
 509 \&\s-1BOM\s0 has an \*(L"=encoding shiftjis\*(R" line).
 510 .PP
 511 If a Pod processor sees any command other than the ones listed
 512 above (like \*(L"=head\*(R", or \*(L"=haed1\*(R", or \*(L"=stuff\*(R", or \*(L"=cuttlefish\*(R",
 513 or \*(L"=w123\*(R"), that processor must by default treat this as an
 514 error.  It must not process the paragraph beginning with that
 515 command, must by default warn of this as an error, and may
 516 abort the parse.  A Pod parser may allow a way for particular
 517 applications to add to the above list of known commands, and to
 518 stipulate, for each additional command, whether formatting
 519 codes should be processed.
 520 .PP
 521 Future versions of this specification may add additional
 522 commands.
 523 .SH "Pod Formatting Codes"
 524 .IX Header "Pod Formatting Codes"
 525 (Note that in previous drafts of this document and of perlpod,
 526 formatting codes were referred to as \*(L"interior sequences\*(R", and
 527 this term may still be found in the documentation for Pod parsers,
 528 and in error messages from Pod processors.)
 529 .PP
 530 There are two syntaxes for formatting codes:
 531 .IP "\(bu" 4
 532 A formatting code starts with a capital letter (just US-ASCII [A\-Z])
 533 followed by a \*(L"<\*(R", any number of characters, and ending with the first
 534 matching \*(L">\*(R".  Examples:
 535 .Sp
 536 .Vb 1
 537 \&    That's what I<you> think!
 538 .Ve
 539 .Sp
 540 .Vb 1
 541 \&    What's C<dump()> for?
 542 .Ve
 543 .Sp
 544 .Vb 1
 545 \&    X<C<chmod> and C<unlink()> Under Different Operating Systems>
 546 .Ve
 547 .IP "\(bu" 4
 548 A formatting code starts with a capital letter (just US-ASCII [A\-Z])
 549 followed by two or more \*(L"<\*(R"'s, one or more whitespace characters,
 550 any number of characters, one or more whitespace characters,
 551 and ending with the first matching sequence of two or more \*(L">\*(R"'s, where
 552 the number of \*(L">\*(R"'s equals the number of \*(L"<\*(R"'s in the opening of this
 553 formatting code.  Examples:
 554 .Sp
 555 .Vb 1
 556 \&    That's what I<< you >> think!
 557 .Ve
 558 .Sp
 559 .Vb 1
 560 \&    C<<< open(X, ">>thing.dat") || die $! >>>
 561 .Ve
 562 .Sp
 563 .Vb 1
 564 \&    B<< $foo\->bar(); >>
 565 .Ve
 566 .Sp
 567 With this syntax, the whitespace character(s) after the "C<<<\*(L"
 568 and before the \*(R">>" (or whatever letter) are \fInot\fR renderable \*(-- they
 569 do not signify whitespace, are merely part of the formatting codes
 570 themselves.  That is, these are all synonymous:
 571 .Sp
 572 .Vb 7
 573 \&    C<thing>
 574 \&    C<< thing >>
 575 \&    C<<           thing     >>
 576 \&    C<<<   thing >>>
 577 \&    C<<<<
 578 \&    thing
 579 \&               >>>>
 580 .Ve
 581 .Sp
 582 and so on.
 583 .PP
 584 In parsing Pod, a notably tricky part is the correct parsing of
 585 (potentially nested!) formatting codes.  Implementors should
 586 consult the code in the \f(CW\*(C`parse_text\*(C'\fR routine in Pod::Parser as an
 587 example of a correct implementation.
 588 .ie n .IP """I<text>"" \*(-- italic text" 4
 589 .el .IP "\f(CWI<text>\fR \*(-- italic text" 4
 590 .IX Item "I<text>  italic text"
 591 See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
 592 .ie n .IP """B<text>"" \*(-- bold text" 4
 593 .el .IP "\f(CWB<text>\fR \*(-- bold text" 4
 594 .IX Item "B<text>  bold text"
 595 See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
 596 .ie n .IP """C<code>"" \*(-- code text" 4
 597 .el .IP "\f(CWC<code>\fR \*(-- code text" 4
 598 .IX Item "C<code>  code text"
 599 See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
 600 .ie n .IP """F<filename>"" \*(-- style for filenames" 4
 601 .el .IP "\f(CWF<filename>\fR \*(-- style for filenames" 4
 602 .IX Item "F<filename>  style for filenames"
 603 See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
 604 .ie n .IP """X<topic name>"" \*(-- an index entry" 4
 605 .el .IP "\f(CWX<topic name>\fR \*(-- an index entry" 4
 606 .IX Item "X<topic name>  an index entry"
 607 See the brief discussion in \*(L"Formatting Codes\*(R" in perlpod.
 608 .Sp
 609 This code is unusual in that most formatters completely discard
 610 this code and its content.  Other formatters will render it with
 611 invisible codes that can be used in building an index of
 612 the current document.
 613 .ie n .IP """Z<>"" \*(-- a null (zero\-effect) formatting code" 4
 614 .el .IP "\f(CWZ<>\fR \*(-- a null (zero\-effect) formatting code" 4
 615 .IX Item "Z<>  a null (zero-effect) formatting code"
 616 Discussed briefly in \*(L"Formatting Codes\*(R" in perlpod.
 617 .Sp
 618 This code is unusual is that it should have no content.  That is,
 619 a processor may complain if it sees \f(CW\*(C`Z<potatoes>\*(C'\fR.  Whether
 620 or not it complains, the \fIpotatoes\fR text should ignored.
 621 .ie n .IP """L<name>"" \*(-- a hyperlink" 4
 622 .el .IP "\f(CWL<name>\fR \*(-- a hyperlink" 4
 623 .IX Item "L<name>  a hyperlink"
 624 The complicated syntaxes of this code are discussed at length in
 625 \&\*(L"Formatting Codes\*(R" in perlpod, and implementation details are
 626 discussed below, in "About L<...> Codes".  Parsing the
 627 contents of L<content> is tricky.  Notably, the content has to be
 628 checked for whether it looks like a \s-1URL\s0, or whether it has to be split
 629 on literal \*(L"|\*(R" and/or \*(L"/\*(R" (in the right order!), and so on,
 630 \&\fIbefore\fR E<...> codes are resolved.
 631 .ie n .IP """E<escape>"" \*(-- a character escape" 4
 632 .el .IP "\f(CWE<escape>\fR \*(-- a character escape" 4
 633 .IX Item "E<escape>  a character escape"
 634 See \*(L"Formatting Codes\*(R" in perlpod, and several points in
 635 \&\*(L"Notes on Implementing Pod Processors\*(R".
 636 .ie n .IP """S<text>"" \*(-- text contains non-breaking spaces" 4
 637 .el .IP "\f(CWS<text>\fR \*(-- text contains non-breaking spaces" 4
 638 .IX Item "S<text>  text contains non-breaking spaces"
 639 This formatting code is syntactically simple, but semantically
 640 complex.  What it means is that each space in the printable
 641 content of this code signifies a non-breaking space.
 642 .Sp
 643 Consider:
 644 .Sp
 645 .Vb 1
 646 \&    C<$x ? $y    :  $z>
 647 .Ve
 648 .Sp
 649 .Vb 1
 650 \&    S<C<$x ? $y     :  $z>>
 651 .Ve
 652 .Sp
 653 Both signify the monospace (c[ode] style) text consisting of
 654 \&\*(L"$x\*(R", one space, \*(L"?\*(R", one space, \*(L":\*(R", one space, \*(L"$z\*(R".  The
 655 difference is that in the latter, with the S code, those spaces
 656 are not \*(L"normal\*(R" spaces, but instead are non-breaking spaces.
 657 .PP
 658 If a Pod processor sees any formatting code other than the ones
 659 listed above (as in "N<...>\*(L", or \*(R"Q<...>", etc.), that
 660 processor must by default treat this as an error.
 661 A Pod parser may allow a way for particular
 662 applications to add to the above list of known formatting codes;
 663 a Pod parser might even allow a way to stipulate, for each additional
 664 command, whether it requires some form of special processing, as
 665 L<...> does.
 666 .PP
 667 Future versions of this specification may add additional
 668 formatting codes.
 669 .PP
 670 Historical note:  A few older Pod processors would not see a \*(L">\*(R" as
 671 closing a "C<\*(L" code, if the \*(R">\*(L" was immediately preceded by
 672 a \*(R"\-".  This was so that this:
 673 .PP
 674 .Vb 1
 675 \&    C<$foo\->bar>
 676 .Ve
 677 .PP
 678 would parse as equivalent to this:
 679 .PP
 680 .Vb 1
 681 \&    C<$foo\-E<gt>bar>
 682 .Ve
 683 .PP
 684 instead of as equivalent to a \*(L"C\*(R" formatting code containing
 685 only \*(L"$foo\-\*(R", and then a \*(L"bar>\*(R" outside the \*(L"C\*(R" formatting code.  This
 686 problem has since been solved by the addition of syntaxes like this:
 687 .PP
 688 .Vb 1
 689 \&    C<< $foo\->bar >>
 690 .Ve
 691 .PP
 692 Compliant parsers must not treat \*(L"\->\*(R" as special.
 693 .PP
 694 Formatting codes absolutely cannot span paragraphs.  If a code is
 695 opened in one paragraph, and no closing code is found by the end of
 696 that paragraph, the Pod parser must close that formatting code,
 697 and should complain (as in \*(L"Unterminated I code in the paragraph
 698 starting at line 123: 'Time objects are not...'\*(R").  So these
 699 two paragraphs:
 700 .PP
 701 .Vb 1
 702 \&  I<I told you not to do this!
 703 .Ve
 704 .PP
 705 .Vb 1
 706 \&  Don't make me say it again!>
 707 .Ve
 708 .PP
 709 \&...must \fInot\fR be parsed as two paragraphs in italics (with the I
 710 code starting in one paragraph and starting in another.)  Instead,
 711 the first paragraph should generate a warning, but that aside, the
 712 above code must parse as if it were:
 713 .PP
 714 .Vb 1
 715 \&  I<I told you not to do this!>
 716 .Ve
 717 .PP
 718 .Vb 1
 719 \&  Don't make me say it again!E<gt>
 720 .Ve
 721 .PP
 722 (In SGMLish jargon, all Pod commands are like block-level
 723 elements, whereas all Pod formatting codes are like inline-level
 724 elements.)
 725 .SH "Notes on Implementing Pod Processors"
 726 .IX Header "Notes on Implementing Pod Processors"
 727 The following is a long section of miscellaneous requirements
 728 and suggestions to do with Pod processing.
 729 .IP "\(bu" 4
 730 Pod formatters should tolerate lines in verbatim blocks that are of
 731 any length, even if that means having to break them (possibly several
 732 times, for very long lines) to avoid text running off the side of the
 733 page.  Pod formatters may warn of such line\-breaking.  Such warnings
 734 are particularly appropriate for lines are over 100 characters long, which
 735 are usually not intentional.
 736 .IP "\(bu" 4
 737 Pod parsers must recognize \fIall\fR of the three well-known newline
 738 formats: \s-1CR\s0, \s-1LF\s0, and \s-1CRLF\s0.  See perlport.
 739 .IP "\(bu" 4
 740 Pod parsers should accept input lines that are of any length.
 741 .IP "\(bu" 4
 742 Since Perl recognizes a Unicode Byte Order Mark at the start of files
 743 as signaling that the file is Unicode encoded as in \s-1UTF\-16\s0 (whether
 744 big-endian or little\-endian) or \s-1UTF\-8\s0, Pod parsers should do the
 745 same.  Otherwise, the character encoding should be understood as
 746 being \s-1UTF\-8\s0 if the first highbit byte sequence in the file seems
 747 valid as a \s-1UTF\-8\s0 sequence, or otherwise as Latin\-1.
 748 .Sp
 749 Future versions of this specification may specify
 750 how Pod can accept other encodings.  Presumably treatment of other
 751 encodings in Pod parsing would be as in \s-1XML\s0 parsing: whatever the
 752 encoding declared by a particular Pod file, content is to be
 753 stored in memory as Unicode characters.
 754 .IP "\(bu" 4
 755 The well known Unicode Byte Order Marks are as follows:  if the
 756 file begins with the two literal byte values 0xFE 0xFF, this is
 757 the \s-1BOM\s0 for big-endian \s-1UTF\-16\s0.  If the file begins with the two
 758 literal byte value 0xFF 0xFE, this is the \s-1BOM\s0 for little-endian
 759 \&\s-1UTF\-16\s0.  If the file begins with the three literal byte values
 760 0xEF 0xBB 0xBF, this is the \s-1BOM\s0 for \s-1UTF\-8\s0.
 761 .IP "\(bu" 4
 762 A naive but sufficient heuristic for testing the first highbit
 763 byte-sequence in a BOM-less file (whether in code or in Pod!), to see
 764 whether that sequence is valid as \s-1UTF\-8\s0 (\s-1RFC\s0 2279) is to check whether
 765 that the first byte in the sequence is in the range 0xC0 \- 0xFD
 766 \&\fIand\fR whether the next byte is in the range
 767 0x80 \- 0xBF.  If so, the parser may conclude that this file is in
 768 \&\s-1UTF\-8\s0, and all highbit sequences in the file should be assumed to
 769 be \s-1UTF\-8\s0.  Otherwise the parser should treat the file as being
 770 in Latin\-1.  In the unlikely circumstance that the first highbit
 771 sequence in a truly non\-UTF\-8 file happens to appear to be \s-1UTF\-8\s0, one
 772 can cater to our heuristic (as well as any more intelligent heuristic)
 773 by prefacing that line with a comment line containing a highbit
 774 sequence that is clearly \fInot\fR valid as \s-1UTF\-8\s0.  A line consisting
 775 of simply \*(L"#\*(R", an e\-acute, and any non-highbit byte,
 776 is sufficient to establish this file's encoding.
 777 .IP "\(bu" 4
 778 This document's requirements and suggestions about encodings
 779 do not apply to Pod processors running on non-ASCII platforms,
 780 notably \s-1EBCDIC\s0 platforms.
 781 .IP "\(bu" 4
 782 Pod processors must treat a \*(L"=for [label] [content...]\*(R" paragraph as
 783 meaning the same thing as a \*(L"=begin [label]\*(R" paragraph, content, and
 784 an \*(L"=end [label]\*(R" paragraph.  (The parser may conflate these two
 785 constructs, or may leave them distinct, in the expectation that the
 786 formatter will nevertheless treat them the same.)
 787 .IP "\(bu" 4
 788 When rendering Pod to a format that allows comments (i.e., to nearly
 789 any format other than plaintext), a Pod formatter must insert comment
 790 text identifying its name and version number, and the name and
 791 version numbers of any modules it might be using to process the Pod.
 792 Minimal examples:
 793 .Sp
 794 .Vb 1
 795 \&  %% POD::Pod2PS v3.14159, using POD::Parser v1.92
 796 .Ve
 797 .Sp
 798 .Vb 1
 799 \&  <!\-\- Pod::HTML v3.14159, using POD::Parser v1.92 \-\->
 800 .Ve
 801 .Sp
 802 .Vb 1
 803 \&  {\edoccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08}
 804 .Ve
 805 .Sp
 806 .Vb 1
 807 \&  .\e" Pod::Man version 3.14159, using POD::Parser version 1.92
 808 .Ve
 809 .Sp
 810 Formatters may also insert additional comments, including: the
 811 release date of the Pod formatter program, the contact address for
 812 the author(s) of the formatter, the current time, the name of input
 813 file, the formatting options in effect, version of Perl used, etc.
 814 .Sp
 815 Formatters may also choose to note errors/warnings as comments,
 816 besides or instead of emitting them otherwise (as in messages to
 817 \&\s-1STDERR\s0, or \f(CW\*(C`die\*(C'\fRing).
 818 .IP "\(bu" 4
 819 Pod parsers \fImay\fR emit warnings or error messages ("Unknown E code
 820 E<zslig>!") to \s-1STDERR\s0 (whether through printing to \s-1STDERR\s0, or
 821 \&\f(CW\*(C`warn\*(C'\fRing/\f(CW\*(C`carp\*(C'\fRing, or \f(CW\*(C`die\*(C'\fRing/\f(CW\*(C`croak\*(C'\fRing), but \fImust\fR allow
 822 suppressing all such \s-1STDERR\s0 output, and instead allow an option for
 823 reporting errors/warnings
 824 in some other way, whether by triggering a callback, or noting errors
 825 in some attribute of the document object, or some similarly unobtrusive
 826 mechanism \*(-- or even by appending a \*(L"Pod Errors\*(R" section to the end of
 827 the parsed form of the document.
 828 .IP "\(bu" 4
 829 In cases of exceptionally aberrant documents, Pod parsers may abort the
 830 parse.  Even then, using \f(CW\*(C`die\*(C'\fRing/\f(CW\*(C`croak\*(C'\fRing is to be avoided; where
 831 possible, the parser library may simply close the input file
 832 and add text like \*(L"*** Formatting Aborted ***\*(R" to the end of the
 833 (partial) in-memory document.
 834 .IP "\(bu" 4
 835 In paragraphs where formatting codes (like E<...>, B<...>)
 836 are understood (i.e., \fInot\fR verbatim paragraphs, but \fIincluding\fR
 837 ordinary paragraphs, and command paragraphs that produce renderable
 838 text, like \*(L"=head1\*(R"), literal whitespace should generally be considered
 839 \&\*(L"insignificant\*(R", in that one literal space has the same meaning as any
 840 (nonzero) number of literal spaces, literal newlines, and literal tabs
 841 (as long as this produces no blank lines, since those would terminate
 842 the paragraph).  Pod parsers should compact literal whitespace in each
 843 processed paragraph, but may provide an option for overriding this
 844 (since some processing tasks do not require it), or may follow
 845 additional special rules (for example, specially treating
 846 period-space-space or period-newline sequences).
 847 .IP "\(bu" 4
 848 Pod parsers should not, by default, try to coerce apostrophe (') and
 849 quote (\*(L") into smart quotes (little 9's, 66's, 99's, etc), nor try to
 850 turn backtick (`) into anything else but a single backtick character
 851 (distinct from an open quote character!), nor \*(R"\-\-" into anything but
 852 two minus signs.  They \fImust never\fR do any of those things to text
 853 in C<...> formatting codes, and never \fIever\fR to text in verbatim
 854 paragraphs.
 855 .IP "\(bu" 4
 856 When rendering Pod to a format that has two kinds of hyphens (\-), one
 857 that's a non-breaking hyphen, and another that's a breakable hyphen
 858 (as in \*(L"object\-oriented\*(R", which can be split across lines as
 859 \&\*(L"object\-\*(R", newline, \*(L"oriented\*(R"), formatters are encouraged to
 860 generally translate \*(L"\-\*(R" to non-breaking hyphen, but may apply
 861 heuristics to convert some of these to breaking hyphens.
 862 .IP "\(bu" 4
 863 Pod formatters should make reasonable efforts to keep words of Perl
 864 code from being broken across lines.  For example, \*(L"Foo::Bar\*(R" in some
 865 formatting systems is seen as eligible for being broken across lines
 866 as \*(L"Foo::\*(R" newline \*(L"Bar\*(R" or even \*(L"Foo::\-\*(R" newline \*(L"Bar\*(R".  This should
 867 be avoided where possible, either by disabling all line-breaking in
 868 mid\-word, or by wrapping particular words with internal punctuation
 869 in \*(L"don't break this across lines\*(R" codes (which in some formats may
 870 not be a single code, but might be a matter of inserting non-breaking
 871 zero-width spaces between every pair of characters in a word.)
 872 .IP "\(bu" 4
 873 Pod parsers should, by default, expand tabs in verbatim paragraphs as
 874 they are processed, before passing them to the formatter or other
 875 processor.  Parsers may also allow an option for overriding this.
 876 .IP "\(bu" 4
 877 Pod parsers should, by default, remove newlines from the end of
 878 ordinary and verbatim paragraphs before passing them to the
 879 formatter.  For example, while the paragraph you're reading now
 880 could be considered, in Pod source, to end with (and contain)
 881 the newline(s) that end it, it should be processed as ending with
 882 (and containing) the period character that ends this sentence.
 883 .IP "\(bu" 4
 884 Pod parsers, when reporting errors, should make some effort to report
 885 an approximate line number ("Nested E<>'s in Paragraph #52, near
 886 line 633 of Thing/Foo.pm!\*(L"), instead of merely noting the paragraph
 887 number (\*(R"Nested E<>'s in Paragraph #52 of Thing/Foo.pm!\*(L").  Where
 888 this is problematic, the paragraph number should at least be
 889 accompanied by an excerpt from the paragraph (\*(R"Nested E<>'s in
 890 Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for
 891 the C<interest rate> attribute...'").
 892 .IP "\(bu" 4
 893 Pod parsers, when processing a series of verbatim paragraphs one
 894 after another, should consider them to be one large verbatim
 895 paragraph that happens to contain blank lines.  I.e., these two
 896 lines, which have a blank line between them:
 897 .Sp
 898 .Vb 1
 899 \&        use Foo;
 900 .Ve
 901 .Sp
 902 .Vb 1
 903 \&        print Foo\->VERSION
 904 .Ve
 905 .Sp
 906 should be unified into one paragraph (\*(L"\etuse Foo;\en\en\etprint
 907 Foo\->\s-1VERSION\s0\*(R") before being passed to the formatter or other
 908 processor.  Parsers may also allow an option for overriding this.
 909 .Sp
 910 While this might be too cumbersome to implement in event-based Pod
 911 parsers, it is straightforward for parsers that return parse trees.
 912 .IP "\(bu" 4
 913 Pod formatters, where feasible, are advised to avoid splitting short
 914 verbatim paragraphs (under twelve lines, say) across pages.
 915 .IP "\(bu" 4
 916 Pod parsers must treat a line with only spaces and/or tabs on it as a
 917 \&\*(L"blank line\*(R" such as separates paragraphs.  (Some older parsers
 918 recognized only two adjacent newlines as a \*(L"blank line\*(R" but would not
 919 recognize a newline, a space, and a newline, as a blank line.  This
 920 is noncompliant behavior.)
 921 .IP "\(bu" 4
 922 Authors of Pod formatters/processors should make every effort to
 923 avoid writing their own Pod parser.  There are already several in
 924 \&\s-1CPAN\s0, with a wide range of interface styles \*(-- and one of them,
 925 Pod::Parser, comes with modern versions of Perl.
 926 .IP "\(bu" 4
 927 Characters in Pod documents may be conveyed either as literals, or by
 928 number in E<n> codes, or by an equivalent mnemonic, as in
 929 E<eacute> which is exactly equivalent to E<233>.
 930 .Sp
 931 Characters in the range 32\-126 refer to those well known US-ASCII
 932 characters (also defined there by Unicode, with the same meaning),
 933 which all Pod formatters must render faithfully.  Characters
 934 in the ranges 0\-31 and 127\-159 should not be used (neither as
 935 literals, nor as E<number> codes), except for the
 936 literal byte-sequences for newline (13, 13 10, or 10), and tab (9).
 937 .Sp
 938 Characters in the range 160\-255 refer to Latin\-1 characters (also
 939 defined there by Unicode, with the same meaning).  Characters above
 940 255 should be understood to refer to Unicode characters.
 941 .IP "\(bu" 4
 942 Be warned
 943 that some formatters cannot reliably render characters outside 32\-126;
 944 and many are able to handle 32\-126 and 160\-255, but nothing above
 945 255.
 946 .IP "\(bu" 4
 947 Besides the well-known "E<lt>\*(L" and \*(R"E<gt>\*(L" codes for
 948 less-than and greater\-than, Pod parsers must understand \*(R"E<sol>\*(L"
 949 for \*(R"/\*(L" (solidus, slash), and \*(R"E<verbar>\*(L" for \*(R"|\*(L" (vertical bar,
 950 pipe).  Pod parsers should also understand \*(R"E<lchevron>\*(L" and
 951 \&\*(R"E<rchevron>\*(L" as legacy codes for characters 171 and 187, i.e.,
 952 \&\*(R"left\-pointing double angle quotation mark\*(L" = \*(R"left pointing
 953 guillemet\*(L" and \*(R"right\-pointing double angle quotation mark\*(L" = \*(R"right
 954 pointing guillemet\*(L".  (These look like little \*(R"<<\*(L" and \*(R">>\*(L", and they
 955 are now preferably expressed with the \s-1HTML/XHTML\s0 codes \*(R"E<laquo>\*(L"
 956 and \*(R"E<raquo>".)
 957 .IP "\(bu" 4
 958 Pod parsers should understand all "E<html>" codes as defined
 959 in the entity declarations in the most recent \s-1XHTML\s0 specification at
 960 \&\f(CW\*(C`www.W3.org\*(C'\fR.  Pod parsers must understand at least the entities
 961 that define characters in the range 160\-255 (Latin\-1).  Pod parsers,
 962 when faced with some unknown "E<\fIidentifier\fR>" code,
 963 shouldn't simply replace it with nullstring (by default, at least),
 964 but may pass it through as a string consisting of the literal characters
 965 E, less\-than, \fIidentifier\fR, greater\-than.  Or Pod parsers may offer the
 966 alternative option of processing such unknown
 967 "E<\fIidentifier\fR>\*(L" codes by firing an event especially
 968 for such codes, or by adding a special node-type to the in-memory
 969 document tree.  Such \*(R"E<\fIidentifier\fR>" may have special meaning
 970 to some processors, or some processors may choose to add them to
 971 a special error report.
 972 .IP "\(bu" 4
 973 Pod parsers must also support the \s-1XHTML\s0 codes "E<quot>\*(L" for
 974 character 34 (doublequote, \*(R"), "E<amp>\*(L" for character 38
 975 (ampersand, &), and \*(R"E<apos>" for character 39 (apostrophe, ').
 976 .IP "\(bu" 4
 977 Note that in all cases of "E<whatever>", \fIwhatever\fR (whether
 978 an htmlname, or a number in any base) must consist only of
 979 alphanumeric characters \*(-- that is, \fIwhatever\fR must watch
 980 \&\f(CW\*(C`m/\eA\ew+\ez/\*(C'\fR.  So "E< 0 1 2 3 >" is invalid, because
 981 it contains spaces, which aren't alphanumeric characters.  This
 982 presumably does not \fIneed\fR special treatment by a Pod processor;
 983 \&\*(L" 0 1 2 3 \*(R" doesn't look like a number in any base, so it would
 984 presumably be looked up in the table of HTML-like names.  Since
 985 there isn't (and cannot be) an HTML-like entity called \*(L" 0 1 2 3 \*(R",
 986 this will be treated as an error.  However, Pod processors may
 987 treat "E< 0 1 2 3 >\*(L" or \*(R"E<e\-acute>" as \fIsyntactically\fR
 988 invalid, potentially earning a different error message than the
 989 error message (or warning, or event) generated by a merely unknown
 990 (but theoretically valid) htmlname, as in "E<qacute>"
 991 [sic].  However, Pod parsers are not required to make this
 992 distinction.
 993 .IP "\(bu" 4
 994 Note that E<number> \fImust not\fR be interpreted as simply
 995 "codepoint \fInumber\fR in the current/native character set\*(L".  It always
 996 means only \*(R"the character represented by codepoint \fInumber\fR in
 997 Unicode."  (This is identical to the semantics of &#\fInumber\fR; in \s-1XML\s0.)
 998 .Sp
 999 This will likely require many formatters to have tables mapping from
1000 treatable Unicode codepoints (such as the \*(L"\exE9\*(R" for the e\-acute
1001 character) to the escape sequences or codes necessary for conveying
1002 such sequences in the target output format.  A converter to *roff
1003 would, for example know that \*(L"\exE9\*(R" (whether conveyed literally, or via
1004 a E<...> sequence) is to be conveyed as \*(L"e\e\e*'\*(R".
1005 Similarly, a program rendering Pod in a Mac \s-1OS\s0 application window, would
1006 presumably need to know that \*(L"\exE9\*(R" maps to codepoint 142 in MacRoman
1007 encoding that (at time of writing) is native for Mac \s-1OS\s0.  Such
1008 Unicode2whatever mappings are presumably already widely available for
1009 common output formats.  (Such mappings may be incomplete!  Implementers
1010 are not expected to bend over backwards in an attempt to render
1011 Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any
1012 of the other weird things that Unicode can encode.)  And
1013 if a Pod document uses a character not found in such a mapping, the
1014 formatter should consider it an unrenderable character.
1015 .IP "\(bu" 4
1016 If, surprisingly, the implementor of a Pod formatter can't find a
1017 satisfactory pre-existing table mapping from Unicode characters to
1018 escapes in the target format (e.g., a decent table of Unicode
1019 characters to *roff escapes), it will be necessary to build such a
1020 table.  If you are in this circumstance, you should begin with the
1021 characters in the range 0x00A0 \- 0x00FF, which is mostly the heavily
1022 used accented characters.  Then proceed (as patience permits and
1023 fastidiousness compels) through the characters that the (X)HTML
1024 standards groups judged important enough to merit mnemonics
1025 for.  These are declared in the (X)HTML specifications at the
1026 www.W3.org site.  At time of writing (September 2001), the most recent
1027 entity declaration files are:
1028 .Sp
1029 .Vb 3
1030 \&  http://www.w3.org/TR/xhtml1/DTD/xhtml\-lat1.ent
1031 \&  http://www.w3.org/TR/xhtml1/DTD/xhtml\-special.ent
1032 \&  http://www.w3.org/TR/xhtml1/DTD/xhtml\-symbol.ent
1033 .Ve
1034 .Sp
1035 Then you can progress through any remaining notable Unicode characters
1036 in the range 0x2000\-0x204D (consult the character tables at
1037 www.unicode.org), and whatever else strikes your fancy.  For example,
1038 in \fIxhtml\-symbol.ent\fR, there is the entry:
1039 .Sp
1040 .Vb 1
1041 \&  <!ENTITY infin    "&#8734;"> <!\-\- infinity, U+221E ISOtech \-\->
1042 .Ve
1043 .Sp
1044 While the mapping \*(L"infin\*(R" to the character \*(L"\ex{221E}\*(R" will (hopefully)
1045 have been already handled by the Pod parser, the presence of the
1046 character in this file means that it's reasonably important enough to
1047 include in a formatter's table that maps from notable Unicode characters
1048 to the codes necessary for rendering them.  So for a Unicode\-to\-*roff
1049 mapping, for example, this would merit the entry:
1050 .Sp
1051 .Vb 1
1052 \&  "\ex{221E}" => '\e(in',
1053 .Ve
1054 .Sp
1055 It is eagerly hoped that in the future, increasing numbers of formats
1056 (and formatters) will support Unicode characters directly (as (X)HTML
1057 does with \f(CW\*(C`&infin;\*(C'\fR, \f(CW\*(C`&#8734;\*(C'\fR, or \f(CW\*(C`&#x221E;\*(C'\fR), reducing the need
1058 for idiosyncratic mappings of Unicode\-to\-\fImy_escapes\fR.
1059 .IP "\(bu" 4
1060 It is up to individual Pod formatter to display good judgement when
1061 confronted with an unrenderable character (which is distinct from an
1062 unknown E<thing> sequence that the parser couldn't resolve to
1063 anything, renderable or not).  It is good practice to map Latin letters
1064 with diacritics (like "E<eacute>\*(L"/\*(R"E<233>\*(L") to the corresponding
1065 unaccented US-ASCII letters (like a simple character 101, \*(R"e\*(L"), but
1066 clearly this is often not feasible, and an unrenderable character may
1067 be represented as \*(R"?", or the like.  In attempting a sane fallback
1068 (as from E<233> to \*(L"e\*(R"), Pod formatters may use the
1069 \&\f(CW%Latin1Code_to_fallback\fR table in Pod::Escapes, or
1070 Text::Unidecode, if available.
1071 .Sp
1072 For example, this Pod text:
1073 .Sp
1074 .Vb 1
1075 \&  magic is enabled if you set C<$Currency> to 'E<euro>'.
1076 .Ve
1077 .Sp
1078 may be rendered as:
1079 "magic is enabled if you set \f(CW$Currency\fR to '\fI?\fR'\*(L" or as
1080 \&\*(R"magic is enabled if you set \f(CW$Currency\fR to '\fB[euro]\fR'\*(L", or as
1081 \&\*(R"magic is enabled if you set \f(CW$Currency\fR to '[x20AC]', etc.
1082 .Sp
1083 A Pod formatter may also note, in a comment or warning, a list of what
1084 unrenderable characters were encountered.
1085 .IP "\(bu" 4
1086 E<...> may freely appear in any formatting code (other than
1087 in another E<...> or in an Z<>).  That is, "X<The
1088 E<euro>1,000,000 Solution>\*(L" is valid, as is \*(R"L<The
1089 E<euro>1,000,000 Solution|Million::Euros>".
1090 .IP "\(bu" 4
1091 Some Pod formatters output to formats that implement non-breaking
1092 spaces as an individual character (which I'll call \*(L"\s-1NBSP\s0\*(R"), and
1093 others output to formats that implement non-breaking spaces just as
1094 spaces wrapped in a \*(L"don't break this across lines\*(R" code.  Note that
1095 at the level of Pod, both sorts of codes can occur: Pod can contain a
1096 \&\s-1NBSP\s0 character (whether as a literal, or as a "E<160>\*(L" or
1097 \&\*(R"E<nbsp>\*(L" code); and Pod can contain \*(R"S<foo
1098 I<bar> baz>\*(L" codes, where \*(R"mere spaces\*(L" (character 32) in
1099 such codes are taken to represent non-breaking spaces.  Pod
1100 parsers should consider supporting the optional parsing of \*(R"S<foo
1101 I<bar> baz>\*(L" as if it were
1102 \&\*(R"foo\fI\s-1NBSP\s0\fRI<bar>\fI\s-1NBSP\s0\fRbaz", and, going the other way, the
1103 optional parsing of groups of words joined by \s-1NBSP\s0's as if each group
1104 were in a S<...> code, so that formatters may use the
1105 representation that maps best to what the output format demands.
1106 .IP "\(bu" 4
1107 Some processors may find that the \f(CW\*(C`S<...>\*(C'\fR code is easiest to
1108 implement by replacing each space in the parse tree under the content
1109 of the S, with an \s-1NBSP\s0.  But note: the replacement should apply \fInot\fR to
1110 spaces in \fIall\fR text, but \fIonly\fR to spaces in \fIprintable\fR text.  (This
1111 distinction may or may not be evident in the particular tree/event
1112 model implemented by the Pod parser.)  For example, consider this
1113 unusual case:
1114 .Sp
1115 .Vb 1
1116 \&   S<L</Autoloaded Functions>>
1117 .Ve
1118 .Sp
1119 This means that the space in the middle of the visible link text must
1120 not be broken across lines.  In other words, it's the same as this:
1121 .Sp
1122 .Vb 1
1123 \&   L<"AutoloadedE<160>Functions"/Autoloaded Functions>
1124 .Ve
1125 .Sp
1126 However, a misapplied space-to-NBSP replacement could (wrongly)
1127 produce something equivalent to this:
1128 .Sp
1129 .Vb 1
1130 \&   L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions>
1131 .Ve
1132 .Sp
1133 \&...which is almost definitely not going to work as a hyperlink (assuming
1134 this formatter outputs a format supporting hypertext).
1135 .Sp
1136 Formatters may choose to just not support the S format code,
1137 especially in cases where the output format simply has no \s-1NBSP\s0
1138 character/code and no code for \*(L"don't break this stuff across lines\*(R".
1139 .IP "\(bu" 4
1140 Besides the \s-1NBSP\s0 character discussed above, implementors are reminded
1141 of the existence of the other \*(L"special\*(R" character in Latin\-1, the
1142 \&\*(L"soft hyphen\*(R" character, also known as \*(L"discretionary hyphen\*(R",
1143 i.e. \f(CW\*(C`E<173>\*(C'\fR = \f(CW\*(C`E<0xAD>\*(C'\fR =
1144 \&\f(CW\*(C`E<shy>\*(C'\fR).  This character expresses an optional hyphenation
1145 point.  That is, it normally renders as nothing, but may render as a
1146 \&\*(L"\-\*(R" if a formatter breaks the word at that point.  Pod formatters
1147 should, as appropriate, do one of the following:  1) render this with
1148 a code with the same meaning (e.g., \*(L"\e\-\*(R" in \s-1RTF\s0), 2) pass it through
1149 in the expectation that the formatter understands this character as
1150 such, or 3) delete it.
1151 .Sp
1152 For example:
1153 .Sp
1154 .Vb 3
1155 \&  sigE<shy>action
1156 \&  manuE<shy>script
1157 \&  JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi
1158 .Ve
1159 .Sp
1160 These signal to a formatter that if it is to hyphenate \*(L"sigaction\*(R"
1161 or \*(L"manuscript\*(R", then it should be done as
1162 "sig\-\fI[linebreak]\fRaction\*(L" or \*(R"manu\-\fI[linebreak]\fRscript"
1163 (and if it doesn't hyphenate it, then the \f(CW\*(C`E<shy>\*(C'\fR doesn't
1164 show up at all).  And if it is
1165 to hyphenate \*(L"Jarkko\*(R" and/or \*(L"Hietaniemi\*(R", it can do
1166 so only at the points where there is a \f(CW\*(C`E<shy>\*(C'\fR code.
1167 .Sp
1168 In practice, it is anticipated that this character will not be used
1169 often, but formatters should either support it, or delete it.
1170 .IP "\(bu" 4
1171 If you think that you want to add a new command to Pod (like, say, a
1172 \&\*(L"=biblio\*(R" command), consider whether you could get the same
1173 effect with a for or begin/end sequence: \*(L"=for biblio ...\*(R" or \*(L"=begin
1174 biblio\*(R" ... \*(L"=end biblio\*(R".  Pod processors that don't understand
1175 \&\*(L"=for biblio\*(R", etc, will simply ignore it, whereas they may complain
1176 loudly if they see \*(L"=biblio\*(R".
1177 .IP "\(bu" 4
1178 Throughout this document, \*(L"Pod\*(R" has been the preferred spelling for
1179 the name of the documentation format.  One may also use \*(L"\s-1POD\s0\*(R" or
1180 \&\*(L"pod\*(R".  For the documentation that is (typically) in the Pod
1181 format, you may use \*(L"pod\*(R", or \*(L"Pod\*(R", or \*(L"\s-1POD\s0\*(R".  Understanding these
1182 distinctions is useful; but obsessing over how to spell them, usually
1183 is not.
1184 .SH "About L<...> Codes"
1185 .IX Header "About L<...> Codes"
1186 As you can tell from a glance at perlpod, the L<...>
1187 code is the most complex of the Pod formatting codes.  The points below
1188 will hopefully clarify what it means and how processors should deal
1189 with it.
1190 .IP "\(bu" 4
1191 In parsing an L<...> code, Pod parsers must distinguish at least
1192 four attributes:
1193 .RS 4
1194 .IP "First:" 4
1195 .IX Item "First:"
1196 The link\-text.  If there is none, this must be undef.  (E.g., in
1197 "L<Perl Functions|perlfunc>\*(L", the link-text is \*(R"Perl Functions\*(L".
1198 In \*(R"L<Time::HiRes>\*(L" and even \*(R"L<|Time::HiRes>", there is no
1199 link text.  Note that link text may contain formatting.)
1200 .IP "Second:" 4
1201 .IX Item "Second:"
1202 The possibly inferred link-text \*(-- i.e., if there was no real link
1203 text, then this is the text that we'll infer in its place.  (E.g., for
1204 "L<Getopt::Std>\*(L", the inferred link text is \*(R"Getopt::Std".)
1205 .IP "Third:" 4
1206 .IX Item "Third:"
1207 The name or \s-1URL\s0, or undef if none.  (E.g., in "L<Perl
1208 Functions|perlfunc>\*(L", the name \*(-- also sometimes called the page \*(--
1209 is \*(R"perlfunc\*(L".  In \*(R"L</CAVEATS>", the name is undef.)
1210 .IP "Fourth:" 4
1211 .IX Item "Fourth:"
1212 The section (\s-1AKA\s0 \*(L"item\*(R" in older perlpods), or undef if none.  E.g.,
1213 in "L<Getopt::Std/DESCRIPTION>\*(L", \*(R"\s-1DESCRIPTION\s0\*(L" is the section.  (Note
1214 that this is not the same as a manpage section like the \*(R"5\*(L" in \*(R"man 5
1215 crontab\*(L".  \*(R"Section Foo\*(L" in the Pod sense means the part of the text
1216 that's introduced by the heading or item whose text is \*(R"Foo".)
1217 .RE
1218 .RS 4
1219 .Sp
1220 Pod parsers may also note additional attributes including:
1221 .IP "Fifth:" 4
1222 .IX Item "Fifth:"
1223 A flag for whether item 3 (if present) is a \s-1URL\s0 (like
1224 \&\*(L"http://lists.perl.org\*(R" is), in which case there should be no section
1225 attribute; a Pod name (like \*(L"perldoc\*(R" and \*(L"Getopt::Std\*(R" are); or
1226 possibly a man page name (like \*(L"\fIcrontab\fR\|(5)\*(R" is).
1227 .IP "Sixth:" 4
1228 .IX Item "Sixth:"
1229 The raw original L<...> content, before text is split on
1230 \&\*(L"|\*(R", \*(L"/\*(R", etc, and before E<...> codes are expanded.
1231 .RE
1232 .RS 4
1233 .Sp
1234 (The above were numbered only for concise reference below.  It is not
1235 a requirement that these be passed as an actual list or array.)
1236 .Sp
1237 For example:
1238 .Sp
1239 .Vb 7
1240 \&  L<Foo::Bar>
1241 \&    =>  undef,                          # link text
1242 \&        "Foo::Bar",                     # possibly inferred link text
1243 \&        "Foo::Bar",                     # name
1244 \&        undef,                          # section
1245 \&        'pod',                          # what sort of link
1246 \&        "Foo::Bar"                      # original content
1247 .Ve
1248 .Sp
1249 .Vb 7
1250 \&  L<Perlport's section on NL's|perlport/Newlines>
1251 \&    =>  "Perlport's section on NL's",   # link text
1252 \&        "Perlport's section on NL's",   # possibly inferred link text
1253 \&        "perlport",                     # name
1254 \&        "Newlines",                     # section
1255 \&        'pod',                          # what sort of link
1256 \&        "Perlport's section on NL's|perlport/Newlines" # orig. content
1257 .Ve
1258 .Sp
1259 .Vb 7
1260 \&  L<perlport/Newlines>
1261 \&    =>  undef,                          # link text
1262 \&        '"Newlines" in perlport',       # possibly inferred link text
1263 \&        "perlport",                     # name
1264 \&        "Newlines",                     # section
1265 \&        'pod',                          # what sort of link
1266 \&        "perlport/Newlines"             # original content
1267 .Ve
1268 .Sp
1269 .Vb 7
1270 \&  L<crontab(5)/"DESCRIPTION">
1271 \&    =>  undef,                          # link text
1272 \&        '"DESCRIPTION" in crontab(5)',  # possibly inferred link text
1273 \&        "crontab(5)",                   # name
1274 \&        "DESCRIPTION",                  # section
1275 \&        'man',                          # what sort of link
1276 \&        'crontab(5)/"DESCRIPTION"'      # original content
1277 .Ve
1278 .Sp
1279 .Vb 7
1280 \&  L</Object Attributes>
1281 \&    =>  undef,                          # link text
1282 \&        '"Object Attributes"',          # possibly inferred link text
1283 \&        undef,                          # name
1284 \&        "Object Attributes",            # section
1285 \&        'pod',                          # what sort of link
1286 \&        "/Object Attributes"            # original content
1287 .Ve
1288 .Sp
1289 .Vb 7
1290 \&  L<http://www.perl.org/>
1291 \&    =>  undef,                          # link text
1292 \&        "http://www.perl.org/",         # possibly inferred link text
1293 \&        "http://www.perl.org/",         # name
1294 \&        undef,                          # section
1295 \&        'url',                          # what sort of link
1296 \&        "http://www.perl.org/"          # original content
1297 .Ve
1298 .Sp
1299 Note that you can distinguish URL-links from anything else by the
1300 fact that they match \f(CW\*(C`m/\eA\ew+:[^:\es]\eS*\ez/\*(C'\fR.  So
1301 \&\f(CW\*(C`L<http://www.perl.com>\*(C'\fR is a \s-1URL\s0, but
1302 \&\f(CW\*(C`L<HTTP::Response>\*(C'\fR isn't.
1303 .RE
1304 .IP "\(bu" 4
1305 In case of L<...> codes with no \*(L"text|\*(R" part in them,
1306 older formatters have exhibited great variation in actually displaying
1307 the link or cross reference.  For example, L<\fIcrontab\fR\|(5)> would render
1308 as "the \f(CWcrontab(5)\fR manpage\*(L", or \*(R"in the \f(CWcrontab(5)\fR manpage\*(L"
1309 or just \*(R"\f(CWcrontab(5)\fR".
1310 .Sp
1311 Pod processors must now treat \*(L"text|\*(R"\-less links as follows:
1312 .Sp
1313 .Vb 3
1314 \&  L<name>         =>  L<name|name>
1315 \&  L</section>     =>  L<"section"|/section>
1316 \&  L<name/section> =>  L<"section" in name|name/section>
1317 .Ve
1318 .IP "\(bu" 4
1319 Note that section names might contain markup.  I.e., if a section
1320 starts with:
1321 .Sp
1322 .Vb 1
1323 \&  =head2 About the C<\-M> Operator
1324 .Ve
1325 .Sp
1326 or with:
1327 .Sp
1328 .Vb 1
1329 \&  =item About the C<\-M> Operator
1330 .Ve
1331 .Sp
1332 then a link to it would look like this:
1333 .Sp
1334 .Vb 1
1335 \&  L<somedoc/About the C<\-M> Operator>
1336 .Ve
1337 .Sp
1338 Formatters may choose to ignore the markup for purposes of resolving
1339 the link and use only the renderable characters in the section name,
1340 as in:
1341 .Sp
1342 .Vb 2
1343 \&  <h1><a name="About_the_\-M_Operator">About the <code>\-M</code>
1344 \&  Operator</h1>
1345 .Ve
1346 .Sp
1347 .Vb 1
1348 \&  ...
1349 .Ve
1350 .Sp
1351 .Vb 2
1352 \&  <a href="somedoc#About_the_\-M_Operator">About the <code>\-M</code>
1353 \&  Operator" in somedoc</a>
1354 .Ve
1355 .IP "\(bu" 4
1356 Previous versions of perlpod distinguished \f(CW\*(C`L<name/"section">\*(C'\fR
1357 links from \f(CW\*(C`L<name/item>\*(C'\fR links (and their targets).  These
1358 have been merged syntactically and semantically in the current
1359 specification, and \fIsection\fR can refer either to a "=head\fIn\fR Heading
1360 Content\*(L" command or to a \*(R"=item Item Content" command.  This
1361 specification does not specify what behavior should be in the case
1362 of a given document having several things all seeming to produce the
1363 same \fIsection\fR identifier (e.g., in \s-1HTML\s0, several things all producing
1364 the same \fIanchorname\fR in <a name="\fIanchorname\fR">...</a>
1365 elements).  Where Pod processors can control this behavior, they should
1366 use the first such anchor.  That is, \f(CW\*(C`L<Foo/Bar>\*(C'\fR refers to the
1367 \&\fIfirst\fR \*(L"Bar\*(R" section in Foo.
1368 .Sp
1369 But for some processors/formats this cannot be easily controlled; as
1370 with the \s-1HTML\s0 example, the behavior of multiple ambiguous
1371 <a name="\fIanchorname\fR">...</a> is most easily just left up to
1372 browsers to decide.
1373 .IP "\(bu" 4
1374 Authors wanting to link to a particular (absolute) \s-1URL\s0, must do so
1375 only with "L<scheme:...>" codes (like
1376 L<http://www.perl.org>), and must not attempt "L<Some Site
1377 Name|scheme:...>" codes.  This restriction avoids many problems
1378 in parsing and rendering L<...> codes.
1379 .IP "\(bu" 4
1380 In a \f(CW\*(C`L<text|...>\*(C'\fR code, text may contain formatting codes
1381 for formatting or for E<...> escapes, as in:
1382 .Sp
1383 .Vb 1
1384 \&  L<B<ummE<234>stuff>|...>
1385 .Ve
1386 .Sp
1387 For \f(CW\*(C`L<...>\*(C'\fR codes without a \*(L"name|\*(R" part, only
1388 \&\f(CW\*(C`E<...>\*(C'\fR and \f(CW\*(C`Z<>\*(C'\fR codes may occur \*(-- no
1389 other formatting codes.  That is, authors should not use
1390 "\f(CW\*(C`L<B<Foo::Bar>>\*(C'\fR".
1391 .Sp
1392 Note, however, that formatting codes and Z<>'s can occur in any
1393 and all parts of an L<...> (i.e., in \fIname\fR, \fIsection\fR, \fItext\fR,
1394 and \fIurl\fR).
1395 .Sp
1396 Authors must not nest L<...> codes.  For example, "L<The
1397 L<Foo::Bar> man page>" should be treated as an error.
1398 .IP "\(bu" 4
1399 Note that Pod authors may use formatting codes inside the \*(L"text\*(R"
1400 part of "L<text|name>" (and so on for L<text|/\*(L"sec\*(R">).
1401 .Sp
1402 In other words, this is valid:
1403 .Sp
1404 .Vb 1
1405 \&  Go read L<the docs on C<$.>|perlvar/"$.">
1406 .Ve
1407 .Sp
1408 Some output formats that do allow rendering "L<...>" codes as
1409 hypertext, might not allow the link-text to be formatted; in
1410 that case, formatters will have to just ignore that formatting.
1411 .IP "\(bu" 4
1412 At time of writing, \f(CW\*(C`L<name>\*(C'\fR values are of two types:
1413 either the name of a Pod page like \f(CW\*(C`L<Foo::Bar>\*(C'\fR (which
1414 might be a real Perl module or program in an \f(CW@INC\fR / \s-1PATH\s0
1415 directory, or a .pod file in those places); or the name of a \s-1UNIX\s0
1416 man page, like \f(CW\*(C`L<crontab(5)>\*(C'\fR.  In theory, \f(CW\*(C`L<chmod>\*(C'\fR
1417 in ambiguous between a Pod page called \*(L"chmod\*(R", or the Unix man page
1418 \&\*(L"chmod\*(R" (in whatever man\-section).  However, the presence of a string
1419 in parens, as in \*(L"\fIcrontab\fR\|(5)\*(R", is sufficient to signal that what
1420 is being discussed is not a Pod page, and so is presumably a
1421 \&\s-1UNIX\s0 man page.  The distinction is of no importance to many
1422 Pod processors, but some processors that render to hypertext formats
1423 may need to distinguish them in order to know how to render a
1424 given \f(CW\*(C`L<foo>\*(C'\fR code.
1425 .IP "\(bu" 4
1426 Previous versions of perlpod allowed for a \f(CW\*(C`L<section>\*(C'\fR syntax
1427 (as in \f(CW\*(C`L<Object Attributes>\*(C'\fR), which was not easily distinguishable
1428 from \f(CW\*(C`L<name>\*(C'\fR syntax.  This syntax is no longer in the
1429 specification, and has been replaced by the \f(CW\*(C`L<"section">\*(C'\fR syntax
1430 (where the quotes were formerly optional).  Pod parsers should tolerate
1431 the \f(CW\*(C`L<section>\*(C'\fR syntax, for a while at least.  The suggested
1432 heuristic for distinguishing \f(CW\*(C`L<section>\*(C'\fR from \f(CW\*(C`L<name>\*(C'\fR
1433 is that if it contains any whitespace, it's a \fIsection\fR.  Pod processors
1434 may warn about this being deprecated syntax.
1435 .SH "About =over...=back Regions"
1436 .IX Header "About =over...=back Regions"
1437 \&\*(L"=over\*(R"...\*(L"=back\*(R" regions are used for various kinds of list-like
1438 structures.  (I use the term \*(L"region\*(R" here simply as a collective
1439 term for everything from the \*(L"=over\*(R" to the matching \*(L"=back\*(R".)
1440 .IP "\(bu" 4
1441 The non-zero numeric \fIindentlevel\fR in "=over \fIindentlevel\fR\*(L" ...
1442 \&\*(R"=back\*(L" is used for giving the formatter a clue as to how many
1443 \&\*(R"spaces" (ems, or roughly equivalent units) it should tab over,
1444 although many formatters will have to convert this to an absolute
1445 measurement that may not exactly match with the size of spaces (or M's)
1446 in the document's base font.  Other formatters may have to completely
1447 ignore the number.  The lack of any explicit \fIindentlevel\fR parameter is
1448 equivalent to an \fIindentlevel\fR value of 4.  Pod processors may
1449 complain if \fIindentlevel\fR is present but is not a positive number
1450 matching \f(CW\*(C`m/\eA(\ed*\e.)?\ed+\ez/\*(C'\fR.
1451 .IP "\(bu" 4
1452 Authors of Pod formatters are reminded that \*(L"=over\*(R" ... \*(L"=back\*(R" may
1453 map to several different constructs in your output format.  For
1454 example, in converting Pod to (X)HTML, it can map to any of
1455 <ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or
1456 <blockquote>...</blockquote>.  Similarly, \*(L"=item\*(R" can map to <li> or
1457 <dt>.
1458 .IP "\(bu" 4
1459 Each \*(L"=over\*(R" ... \*(L"=back\*(R" region should be one of the following:
1460 .RS 4
1461 .IP "\(bu" 4
1462 An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only \*(L"=item *\*(R" commands,
1463 each followed by some number of ordinary/verbatim paragraphs, other
1464 nested \*(L"=over\*(R" ... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and
1465 \&\*(L"=begin\*(R"...\*(L"=end\*(R" regions.
1466 .Sp
1467 (Pod processors must tolerate a bare \*(L"=item\*(R" as if it were \*(L"=item
1468 *\*(R".)  Whether \*(L"*\*(R" is rendered as a literal asterisk, an \*(L"o\*(R", or as
1469 some kind of real bullet character, is left up to the Pod formatter,
1470 and may depend on the level of nesting.
1471 .IP "\(bu" 4
1472 An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only
1473 \&\f(CW\*(C`m/\eA=item\es+\ed+\e.?\es*\ez/\*(C'\fR paragraphs, each one (or each group of them)
1474 followed by some number of ordinary/verbatim paragraphs, other nested
1475 \&\*(L"=over\*(R" ... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and/or
1476 \&\*(L"=begin\*(R"...\*(L"=end\*(R" codes.  Note that the numbers must start at 1
1477 in each section, and must proceed in order and without skipping
1478 numbers.
1479 .Sp
1480 (Pod processors must tolerate lines like \*(L"=item 1\*(R" as if they were
1481 \&\*(L"=item 1.\*(R", with the period.)
1482 .IP "\(bu" 4
1483 An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing only \*(L"=item [text]\*(R"
1484 commands, each one (or each group of them) followed by some number of
1485 ordinary/verbatim paragraphs, other nested \*(L"=over\*(R" ... \*(L"=back\*(R"
1486 regions, or \*(L"=for...\*(R" paragraphs, and \*(L"=begin\*(R"...\*(L"=end\*(R" regions.
1487 .Sp
1488 The \*(L"=item [text]\*(R" paragraph should not match
1489 \&\f(CW\*(C`m/\eA=item\es+\ed+\e.?\es*\ez/\*(C'\fR or \f(CW\*(C`m/\eA=item\es+\e*\es*\ez/\*(C'\fR, nor should it
1490 match just \f(CW\*(C`m/\eA=item\es*\ez/\*(C'\fR.
1491 .IP "\(bu" 4
1492 An \*(L"=over\*(R" ... \*(L"=back\*(R" region containing no \*(L"=item\*(R" paragraphs at
1493 all, and containing only some number of
1494 ordinary/verbatim paragraphs, and possibly also some nested \*(L"=over\*(R"
1495 \&... \*(L"=back\*(R" regions, \*(L"=for...\*(R" paragraphs, and \*(L"=begin\*(R"...\*(L"=end\*(R"
1496 regions.  Such an itemless \*(L"=over\*(R" ... \*(L"=back\*(R" region in Pod is
1497 equivalent in meaning to a \*(L"<blockquote>...</blockquote>\*(R" element in
1498 \&\s-1HTML\s0.
1499 .RE
1500 .RS 4
1501 .Sp
1502 Note that with all the above cases, you can determine which type of
1503 \&\*(L"=over\*(R" ... \*(L"=back\*(R" you have, by examining the first (non\-\*(L"=cut\*(R",
1504 non\-\*(L"=pod\*(R") Pod paragraph after the \*(L"=over\*(R" command.
1505 .RE
1506 .IP "\(bu" 4
1507 Pod formatters \fImust\fR tolerate arbitrarily large amounts of text
1508 in the "=item \fItext...\fR" paragraph.  In practice, most such
1509 paragraphs are short, as in:
1510 .Sp
1511 .Vb 1
1512 \&  =item For cutting off our trade with all parts of the world
1513 .Ve
1514 .Sp
1515 But they may be arbitrarily long:
1516 .Sp
1517 .Vb 2
1518 \&  =item For transporting us beyond seas to be tried for pretended
1519 \&  offenses
1520 .Ve
1521 .Sp
1522 .Vb 5
1523 \&  =item He is at this time transporting large armies of foreign
1524 \&  mercenaries to complete the works of death, desolation and
1525 \&  tyranny, already begun with circumstances of cruelty and perfidy
1526 \&  scarcely paralleled in the most barbarous ages, and totally
1527 \&  unworthy the head of a civilized nation.
1528 .Ve
1529 .IP "\(bu" 4
1530 Pod processors should tolerate \*(L"=item *\*(R" / "=item \fInumber\fR" commands
1531 with no accompanying paragraph.  The middle item is an example:
1532 .Sp
1533 .Vb 1
1534 \&  =over
1535 .Ve
1536 .Sp
1537 .Vb 1
1538 \&  =item 1
1539 .Ve
1540 .Sp
1541 .Vb 1
1542 \&  Pick up dry cleaning.
1543 .Ve
1544 .Sp
1545 .Vb 1
1546 \&  =item 2
1547 .Ve
1548 .Sp
1549 .Vb 1
1550 \&  =item 3
1551 .Ve
1552 .Sp
1553 .Vb 1
1554 \&  Stop by the store.  Get Abba Zabas, Stoli, and cheap lawn chairs.
1555 .Ve
1556 .Sp
1557 .Vb 1
1558 \&  =back
1559 .Ve
1560 .IP "\(bu" 4
1561 No \*(L"=over\*(R" ... \*(L"=back\*(R" region can contain headings.  Processors may
1562 treat such a heading as an error.
1563 .IP "\(bu" 4
1564 Note that an \*(L"=over\*(R" ... \*(L"=back\*(R" region should have some
1565 content.  That is, authors should not have an empty region like this:
1566 .Sp
1567 .Vb 1
1568 \&  =over
1569 .Ve
1570 .Sp
1571 .Vb 1
1572 \&  =back
1573 .Ve
1574 .Sp
1575 Pod processors seeing such a contentless \*(L"=over\*(R" ... \*(L"=back\*(R" region,
1576 may ignore it, or may report it as an error.
1577 .IP "\(bu" 4
1578 Processors must tolerate an \*(L"=over\*(R" list that goes off the end of the
1579 document (i.e., which has no matching \*(L"=back\*(R"), but they may warn
1580 about such a list.
1581 .IP "\(bu" 4
1582 Authors of Pod formatters should note that this construct:
1583 .Sp
1584 .Vb 1
1585 \&  =item Neque
1586 .Ve
1587 .Sp
1588 .Vb 1
1589 \&  =item Porro
1590 .Ve
1591 .Sp
1592 .Vb 1
1593 \&  =item Quisquam Est
1594 .Ve
1595 .Sp
1596 .Vb 3
1597 \&  Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1598 \&  velit, sed quia non numquam eius modi tempora incidunt ut
1599 \&  labore et dolore magnam aliquam quaerat voluptatem.
1600 .Ve
1601 .Sp
1602 .Vb 1
1603 \&  =item Ut Enim
1604 .Ve
1605 .Sp
1606 is semantically ambiguous, in a way that makes formatting decisions
1607 a bit difficult.  On the one hand, it could be mention of an item
1608 \&\*(L"Neque\*(R", mention of another item \*(L"Porro\*(R", and mention of another
1609 item \*(L"Quisquam Est\*(R", with just the last one requiring the explanatory
1610 paragraph \*(L"Qui dolorem ipsum quia dolor...\*(R"; and then an item
1611 \&\*(L"Ut Enim\*(R".  In that case, you'd want to format it like so:
1612 .Sp
1613 .Vb 1
1614 \&  Neque
1615 .Ve
1616 .Sp
1617 .Vb 1
1618 \&  Porro
1619 .Ve
1620 .Sp
1621 .Vb 4
1622 \&  Quisquam Est
1623 \&    Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1624 \&    velit, sed quia non numquam eius modi tempora incidunt ut
1625 \&    labore et dolore magnam aliquam quaerat voluptatem.
1626 .Ve
1627 .Sp
1628 .Vb 1
1629 \&  Ut Enim
1630 .Ve
1631 .Sp
1632 But it could equally well be a discussion of three (related or equivalent)
1633 items, \*(L"Neque\*(R", \*(L"Porro\*(R", and \*(L"Quisquam Est\*(R", followed by a paragraph
1634 explaining them all, and then a new item \*(L"Ut Enim\*(R".  In that case, you'd
1635 probably want to format it like so:
1636 .Sp
1637 .Vb 6
1638 \&  Neque
1639 \&  Porro
1640 \&  Quisquam Est
1641 \&    Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1642 \&    velit, sed quia non numquam eius modi tempora incidunt ut
1643 \&    labore et dolore magnam aliquam quaerat voluptatem.
1644 .Ve
1645 .Sp
1646 .Vb 1
1647 \&  Ut Enim
1648 .Ve
1649 .Sp
1650 But (for the foreseeable future), Pod does not provide any way for Pod
1651 authors to distinguish which grouping is meant by the above
1652 \&\*(L"=item\*(R"\-cluster structure.  So formatters should format it like so:
1653 .Sp
1654 .Vb 1
1655 \&  Neque
1656 .Ve
1657 .Sp
1658 .Vb 1
1659 \&  Porro
1660 .Ve
1661 .Sp
1662 .Vb 1
1663 \&  Quisquam Est
1664 .Ve
1665 .Sp
1666 .Vb 3
1667 \&    Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci
1668 \&    velit, sed quia non numquam eius modi tempora incidunt ut
1669 \&    labore et dolore magnam aliquam quaerat voluptatem.
1670 .Ve
1671 .Sp
1672 .Vb 1
1673 \&  Ut Enim
1674 .Ve
1675 .Sp
1676 That is, there should be (at least roughly) equal spacing between
1677 items as between paragraphs (although that spacing may well be less
1678 than the full height of a line of text).  This leaves it to the reader
1679 to use (con)textual cues to figure out whether the \*(L"Qui dolorem
1680 ipsum...\*(R" paragraph applies to the \*(L"Quisquam Est\*(R" item or to all three
1681 items \*(L"Neque\*(R", \*(L"Porro\*(R", and \*(L"Quisquam Est\*(R".  While not an ideal
1682 situation, this is preferable to providing formatting cues that may
1683 be actually contrary to the author's intent.
1684 .ie n .SH "About Data Paragraphs and ""=begin/=end"" Regions"
1685 .el .SH "About Data Paragraphs and ``=begin/=end'' Regions"
1686 .IX Header "About Data Paragraphs and =begin/=end Regions"
1687 Data paragraphs are typically used for inlining non-Pod data that is
1688 to be used (typically passed through) when rendering the document to
1689 a specific format:
1690 .PP
1691 .Vb 1
1692 \&  =begin rtf
1693 .Ve
1694 .PP
1695 .Vb 1
1696 \&  \epar{\epard\eqr\esa4500{\ei Printed\e~\echdate\e~\echtime}\epar}
1697 .Ve
1698 .PP
1699 .Vb 1
1700 \&  =end rtf
1701 .Ve
1702 .PP
1703 The exact same effect could, incidentally, be achieved with a single
1704 \&\*(L"=for\*(R" paragraph:
1705 .PP
1706 .Vb 1
1707 \&  =for rtf \epar{\epard\eqr\esa4500{\ei Printed\e~\echdate\e~\echtime}\epar}
1708 .Ve
1709 .PP
1710 (Although that is not formally a data paragraph, it has the same
1711 meaning as one, and Pod parsers may parse it as one.)
1712 .PP
1713 Another example of a data paragraph:
1714 .PP
1715 .Vb 1
1716 \&  =begin html
1717 .Ve
1718 .PP
1719 .Vb 1
1720 \&  I like <em>PIE</em>!
1721 .Ve
1722 .PP
1723 .Vb 1
1724 \&  <hr>Especially pecan pie!
1725 .Ve
1726 .PP
1727 .Vb 1
1728 \&  =end html
1729 .Ve
1730 .PP
1731 If these were ordinary paragraphs, the Pod parser would try to
1732 expand the "E</em>\*(L" (in the first paragraph) as a formatting
1733 code, just like \*(R"E<lt>\*(L" or \*(R"E<eacute>\*(L".  But since this
1734 is in a \*(R"=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" region \fIand\fR
1735 the identifier \*(L"html\*(R" doesn't begin have a \*(L":\*(R" prefix, the contents
1736 of this region are stored as data paragraphs, instead of being
1737 processed as ordinary paragraphs (or if they began with a spaces
1738 and/or tabs, as verbatim paragraphs).
1739 .PP
1740 As a further example: At time of writing, no \*(L"biblio\*(R" identifier is
1741 supported, but suppose some processor were written to recognize it as
1742 a way of (say) denoting a bibliographic reference (necessarily
1743 containing formatting codes in ordinary paragraphs).  The fact that
1744 \&\*(L"biblio\*(R" paragraphs were meant for ordinary processing would be
1745 indicated by prefacing each \*(L"biblio\*(R" identifier with a colon:
1746 .PP
1747 .Vb 1
1748 \&  =begin :biblio
1749 .Ve
1750 .PP
1751 .Vb 2
1752 \&  Wirth, Niklaus.  1976.  I<Algorithms + Data Structures =
1753 \&  Programs.>  Prentice\-Hall, Englewood Cliffs, NJ.
1754 .Ve
1755 .PP
1756 .Vb 1
1757 \&  =end :biblio
1758 .Ve
1759 .PP
1760 This would signal to the parser that paragraphs in this begin...end
1761 region are subject to normal handling as ordinary/verbatim paragraphs
1762 (while still tagged as meant only for processors that understand the
1763 \&\*(L"biblio\*(R" identifier).  The same effect could be had with:
1764 .PP
1765 .Vb 3
1766 \&  =for :biblio
1767 \&  Wirth, Niklaus.  1976.  I<Algorithms + Data Structures =
1768 \&  Programs.>  Prentice\-Hall, Englewood Cliffs, NJ.
1769 .Ve
1770 .PP
1771 The \*(L":\*(R" on these identifiers means simply \*(L"process this stuff
1772 normally, even though the result will be for some special target\*(R".
1773 I suggest that parser APIs report \*(L"biblio\*(R" as the target identifier,
1774 but also report that it had a \*(L":\*(R" prefix.  (And similarly, with the
1775 above \*(L"html\*(R", report \*(L"html\*(R" as the target identifier, and note the
1776 \&\fIlack\fR of a \*(L":\*(R" prefix.)
1777 .PP
1778 Note that a "=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR" region where
1779 \&\fIidentifier\fR begins with a colon, \fIcan\fR contain commands.  For example:
1780 .PP
1781 .Vb 1
1782 \&  =begin :biblio
1783 .Ve
1784 .PP
1785 .Vb 1
1786 \&  Wirth's classic is available in several editions, including:
1787 .Ve
1788 .PP
1789 .Vb 2
1790 \&  =for comment
1791 \&   hm, check abebooks.com for how much used copies cost.
1792 .Ve
1793 .PP
1794 .Vb 1
1795 \&  =over
1796 .Ve
1797 .PP
1798 .Vb 1
1799 \&  =item
1800 .Ve
1801 .PP
1802 .Vb 2
1803 \&  Wirth, Niklaus.  1975.  I<Algorithmen und Datenstrukturen.>
1804 \&  Teubner, Stuttgart.  [Yes, it's in German.]
1805 .Ve
1806 .PP
1807 .Vb 1
1808 \&  =item
1809 .Ve
1810 .PP
1811 .Vb 2
1812 \&  Wirth, Niklaus.  1976.  I<Algorithms + Data Structures =
1813 \&  Programs.>  Prentice\-Hall, Englewood Cliffs, NJ.
1814 .Ve
1815 .PP
1816 .Vb 1
1817 \&  =back
1818 .Ve
1819 .PP
1820 .Vb 1
1821 \&  =end :biblio
1822 .Ve
1823 .PP
1824 Note, however, a "=begin \fIidentifier\fR\*(L"...\*(R"=end \fIidentifier\fR"
1825 region where \fIidentifier\fR does \fInot\fR begin with a colon, should not
1826 directly contain \*(L"=head1\*(R" ... \*(L"=head4\*(R" commands, nor \*(L"=over\*(R", nor \*(L"=back\*(R",
1827 nor \*(L"=item\*(R".  For example, this may be considered invalid:
1828 .PP
1829 .Vb 1
1830 \&  =begin somedata
1831 .Ve
1832 .PP
1833 .Vb 1
1834 \&  This is a data paragraph.
1835 .Ve
1836 .PP
1837 .Vb 1
1838 \&  =head1 Don't do this!
1839 .Ve
1840 .PP
1841 .Vb 1
1842 \&  This is a data paragraph too.
1843 .Ve
1844 .PP
1845 .Vb 1
1846 \&  =end somedata
1847 .Ve
1848 .PP
1849 A Pod processor may signal that the above (specifically the \*(L"=head1\*(R"
1850 paragraph) is an error.  Note, however, that the following should
1851 \&\fInot\fR be treated as an error:
1852 .PP
1853 .Vb 1
1854 \&  =begin somedata
1855 .Ve
1856 .PP
1857 .Vb 1
1858 \&  This is a data paragraph.
1859 .Ve
1860 .PP
1861 .Vb 1
1862 \&  =cut
1863 .Ve
1864 .PP
1865 .Vb 2
1866 \&  # Yup, this isn't Pod anymore.
1867 \&  sub excl { (rand() > .5) ? "hoo!" : "hah!" }
1868 .Ve
1869 .PP
1870 .Vb 1
1871 \&  =pod
1872 .Ve
1873 .PP
1874 .Vb 1
1875 \&  This is a data paragraph too.
1876 .Ve
1877 .PP
1878 .Vb 1
1879 \&  =end somedata
1880 .Ve
1881 .PP
1882 And this too is valid:
1883 .PP
1884 .Vb 1
1885 \&  =begin someformat
1886 .Ve
1887 .PP
1888 .Vb 1
1889 \&  This is a data paragraph.
1890 .Ve
1891 .PP
1892 .Vb 1
1893 \&    And this is a data paragraph.
1894 .Ve
1895 .PP
1896 .Vb 1
1897 \&  =begin someotherformat
1898 .Ve
1899 .PP
1900 .Vb 1
1901 \&  This is a data paragraph too.
1902 .Ve
1903 .PP
1904 .Vb 1
1905 \&    And this is a data paragraph too.
1906 .Ve
1907 .PP
1908 .Vb 1
1909 \&  =begin :yetanotherformat
1910 .Ve
1911 .PP
1912 .Vb 1
1913 \&  =head2 This is a command paragraph!
1914 .Ve
1915 .PP
1916 .Vb 1
1917 \&  This is an ordinary paragraph!
1918 .Ve
1919 .PP
1920 .Vb 1
1921 \&    And this is a verbatim paragraph!
1922 .Ve
1923 .PP
1924 .Vb 1
1925 \&  =end :yetanotherformat
1926 .Ve
1927 .PP
1928 .Vb 1
1929 \&  =end someotherformat
1930 .Ve
1931 .PP
1932 .Vb 1
1933 \&  Another data paragraph!
1934 .Ve
1935 .PP
1936 .Vb 1
1937 \&  =end someformat
1938 .Ve
1939 .PP
1940 The contents of the above \*(L"=begin :yetanotherformat\*(R" ...
1941 \&\*(L"=end :yetanotherformat\*(R" region \fIaren't\fR data paragraphs, because
1942 the immediately containing region's identifier (\*(L":yetanotherformat\*(R")
1943 begins with a colon.  In practice, most regions that contain
1944 data paragraphs will contain \fIonly\fR data paragraphs; however,
1945 the above nesting is syntactically valid as Pod, even if it is
1946 rare.  However, the handlers for some formats, like \*(L"html\*(R",
1947 will accept only data paragraphs, not nested regions; and they may
1948 complain if they see (targeted for them) nested regions, or commands,
1949 other than \*(L"=end\*(R", \*(L"=pod\*(R", and \*(L"=cut\*(R".
1950 .PP
1951 Also consider this valid structure:
1952 .PP
1953 .Vb 1
1954 \&  =begin :biblio
1955 .Ve
1956 .PP
1957 .Vb 1
1958 \&  Wirth's classic is available in several editions, including:
1959 .Ve
1960 .PP
1961 .Vb 1
1962 \&  =over
1963 .Ve
1964 .PP
1965 .Vb 1
1966 \&  =item
1967 .Ve
1968 .PP
1969 .Vb 2
1970 \&  Wirth, Niklaus.  1975.  I<Algorithmen und Datenstrukturen.>
1971 \&  Teubner, Stuttgart.  [Yes, it's in German.]
1972 .Ve
1973 .PP
1974 .Vb 1
1975 \&  =item
1976 .Ve
1977 .PP
1978 .Vb 2
1979 \&  Wirth, Niklaus.  1976.  I<Algorithms + Data Structures =
1980 \&  Programs.>  Prentice\-Hall, Englewood Cliffs, NJ.
1981 .Ve
1982 .PP
1983 .Vb 1
1984 \&  =back
1985 .Ve
1986 .PP
1987 .Vb 1
1988 \&  Buy buy buy!
1989 .Ve
1990 .PP
1991 .Vb 1
1992 \&  =begin html
1993 .Ve
1994 .PP
1995 .Vb 1
1996 \&  <img src='wirth_spokesmodeling_book.png'>
1997 .Ve
1998 .PP
1999 .Vb 1
2000 \&  <hr>
2001 .Ve
2002 .PP
2003 .Vb 1
2004 \&  =end html
2005 .Ve
2006 .PP
2007 .Vb 1
2008 \&  Now now now!
2009 .Ve
2010 .PP
2011 .Vb 1
2012 \&  =end :biblio
2013 .Ve
2014 .PP
2015 There, the \*(L"=begin html\*(R"...\*(L"=end html\*(R" region is nested inside
2016 the larger \*(L"=begin :biblio\*(R"...\*(L"=end :biblio\*(R" region.  Note that the
2017 content of the \*(L"=begin html\*(R"...\*(L"=end html\*(R" region is data
2018 paragraph(s), because the immediately containing region's identifier
2019 (\*(L"html\*(R") \fIdoesn't\fR begin with a colon.
2020 .PP
2021 Pod parsers, when processing a series of data paragraphs one
2022 after another (within a single region), should consider them to
2023 be one large data paragraph that happens to contain blank lines.  So
2024 the content of the above \*(L"=begin html\*(R"...\*(L"=end html\*(R" \fImay\fR be stored
2025 as two data paragraphs (one consisting of
2026 \&\*(L"<img src='wirth_spokesmodeling_book.png'>\en\*(R"
2027 and another consisting of \*(L"<hr>\en\*(R"), but \fIshould\fR be stored as
2028 a single data paragraph (consisting of
2029 \&\*(L"<img src='wirth_spokesmodeling_book.png'>\en\en<hr>\en\*(R").
2030 .PP
2031 Pod processors should tolerate empty
2032 "=begin \fIsomething\fR\*(L"...\*(R"=end \fIsomething\fR\*(L" regions,
2033 empty \*(R"=begin :\fIsomething\fR\*(L"...\*(R"=end :\fIsomething\fR\*(L" regions, and
2034 contentless \*(R"=for \fIsomething\fR\*(L" and \*(R"=for :\fIsomething\fR"
2035 paragraphs.  I.e., these should be tolerated:
2036 .PP
2037 .Vb 1
2038 \&  =for html
2039 .Ve
2040 .PP
2041 .Vb 1
2042 \&  =begin html
2043 .Ve
2044 .PP
2045 .Vb 1
2046 \&  =end html
2047 .Ve
2048 .PP
2049 .Vb 1
2050 \&  =begin :biblio
2051 .Ve
2052 .PP
2053 .Vb 1
2054 \&  =end :biblio
2055 .Ve
2056 .PP
2057 Incidentally, note that there's no easy way to express a data
2058 paragraph starting with something that looks like a command.  Consider:
2059 .PP
2060 .Vb 1
2061 \&  =begin stuff
2062 .Ve
2063 .PP
2064 .Vb 1
2065 \&  =shazbot
2066 .Ve
2067 .PP
2068 .Vb 1
2069 \&  =end stuff
2070 .Ve
2071 .PP
2072 There, \*(L"=shazbot\*(R" will be parsed as a Pod command \*(L"shazbot\*(R", not as a data
2073 paragraph \*(L"=shazbot\en\*(R".  However, you can express a data paragraph consisting
2074 of \*(L"=shazbot\en\*(R" using this code:
2075 .PP
2076 .Vb 1
2077 \&  =for stuff =shazbot
2078 .Ve
2079 .PP
2080 The situation where this is necessary, is presumably quite rare.
2081 .PP
2082 Note that =end commands must match the currently open =begin command.  That
2083 is, they must properly nest.  For example, this is valid:
2084 .PP
2085 .Vb 1
2086 \&  =begin outer
2087 .Ve
2088 .PP
2089 .Vb 1
2090 \&  X
2091 .Ve
2092 .PP
2093 .Vb 1
2094 \&  =begin inner
2095 .Ve
2096 .PP
2097 .Vb 1
2098 \&  Y
2099 .Ve
2100 .PP
2101 .Vb 1
2102 \&  =end inner
2103 .Ve
2104 .PP
2105 .Vb 1
2106 \&  Z
2107 .Ve
2108 .PP
2109 .Vb 1
2110 \&  =end outer
2111 .Ve
2112 .PP
2113 while this is invalid:
2114 .PP
2115 .Vb 1
2116 \&  =begin outer
2117 .Ve
2118 .PP
2119 .Vb 1
2120 \&  X
2121 .Ve
2122 .PP
2123 .Vb 1
2124 \&  =begin inner
2125 .Ve
2126 .PP
2127 .Vb 1
2128 \&  Y
2129 .Ve
2130 .PP
2131 .Vb 1
2132 \&  =end outer
2133 .Ve
2134 .PP
2135 .Vb 1
2136 \&  Z
2137 .Ve
2138 .PP
2139 .Vb 1
2140 \&  =end inner
2141 .Ve
2142 .PP
2143 This latter is improper because when the \*(L"=end outer\*(R" command is seen, the
2144 currently open region has the formatname \*(L"inner\*(R", not \*(L"outer\*(R".  (It just
2145 happens that \*(L"outer\*(R" is the format name of a higher-up region.)  This is
2146 an error.  Processors must by default report this as an error, and may halt
2147 processing the document containing that error.  A corollary of this is that
2148 regions cannot \*(L"overlap\*(R" \*(-- i.e., the latter block above does not represent
2149 a region called \*(L"outer\*(R" which contains X and Y, overlapping a region called
2150 \&\*(L"inner\*(R" which contains Y and Z.  But because it is invalid (as all
2151 apparently overlapping regions would be), it doesn't represent that, or
2152 anything at all.
2153 .PP
2154 Similarly, this is invalid:
2155 .PP
2156 .Vb 1
2157 \&  =begin thing
2158 .Ve
2159 .PP
2160 .Vb 1
2161 \&  =end hting
2162 .Ve
2163 .PP
2164 This is an error because the region is opened by \*(L"thing\*(R", and the \*(L"=end\*(R"
2165 tries to close \*(L"hting\*(R" [sic].
2166 .PP
2167 This is also invalid:
2168 .PP
2169 .Vb 1
2170 \&  =begin thing
2171 .Ve
2172 .PP
2173 .Vb 1
2174 \&  =end
2175 .Ve
2176 .PP
2177 This is invalid because every \*(L"=end\*(R" command must have a formatname
2178 parameter.
2179 .SH "SEE ALSO"
2180 .IX Header "SEE ALSO"
2181 perlpod, \*(L"PODs: Embedded Documentation\*(R" in perlsyn,
2182 podchecker
2183 .SH "AUTHOR"
2184 .IX Header "AUTHOR"
2185 Sean M. Burke