lib/Pod/Text.pm

   1 # Pod::Text -- Convert POD data to formatted ASCII text.
   2 # $Id: Text.pm,v 2.13 2001/10/20 08:07:21 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module replaces the old Pod::Text that came with versions of Perl prior
  10 # to 5.6.0, and attempts to match its output except for some specific
  11 # circumstances where other decisions seemed to produce better output.  It
  12 # uses Pod::Parser and is designed to be very easy to subclass.
  13 #
  14 # Perl core hackers, please note that this module is also separately
  15 # maintained outside of the Perl core as part of the podlators.  Please send
  16 # me any patches at the address above in addition to sending them to the
  17 # standard Perl mailing lists.
  18
  19 ##############################################################################
  20 # Modules and declarations
  21 ##############################################################################
  22
  23 package Pod::Text;
  24
  25 require 5.004;
  26
  27 use Carp qw(carp croak);
  28 use Exporter ();
  29 use Pod::Select ();
  30
  31 use strict;
  32 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
  33
  34 # We inherit from Pod::Select instead of Pod::Parser so that we can be used by
  35 # Pod::Usage.
  36 @ISA = qw(Pod::Select Exporter);
  37
  38 # We have to export pod2text for backward compatibility.
  39 @EXPORT = qw(pod2text);
  40
  41 # Don't use the CVS revision as the version, since this module is also in Perl
  42 # core and too many things could munge CVS magic revision strings.  This
  43 # number should ideally be the same as the CVS revision in podlators, however.
  44 $VERSION = 2.13;
  45
  46
  47 ##############################################################################
  48 # Table of supported E<> escapes
  49 ##############################################################################
  50
  51 # This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
  52 # got it near verbatim from the original Pod::Text.  It is therefore credited
  53 # to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
  54 # "divide" added by Tim Jenness.
  55 %ESCAPES = (
  56     'amp'       =>    '&',      # ampersand
  57     'lt'        =>    '<',      # left chevron, less-than
  58     'gt'        =>    '>',      # right chevron, greater-than
  59     'quot'      =>    '"',      # double quote
  60     'sol'       =>    '/',      # solidus (forward slash)
  61     'verbar'    =>    '|',      # vertical bar
  62
  63     "Aacute"    =>    "\xC1",   # capital A, acute accent
  64     "aacute"    =>    "\xE1",   # small a, acute accent
  65     "Acirc"     =>    "\xC2",   # capital A, circumflex accent
  66     "acirc"     =>    "\xE2",   # small a, circumflex accent
  67     "AElig"     =>    "\xC6",   # capital AE diphthong (ligature)
  68     "aelig"     =>    "\xE6",   # small ae diphthong (ligature)
  69     "Agrave"    =>    "\xC0",   # capital A, grave accent
  70     "agrave"    =>    "\xE0",   # small a, grave accent
  71     "Aring"     =>    "\xC5",   # capital A, ring
  72     "aring"     =>    "\xE5",   # small a, ring
  73     "Atilde"    =>    "\xC3",   # capital A, tilde
  74     "atilde"    =>    "\xE3",   # small a, tilde
  75     "Auml"      =>    "\xC4",   # capital A, dieresis or umlaut mark
  76     "auml"      =>    "\xE4",   # small a, dieresis or umlaut mark
  77     "Ccedil"    =>    "\xC7",   # capital C, cedilla
  78     "ccedil"    =>    "\xE7",   # small c, cedilla
  79     "Eacute"    =>    "\xC9",   # capital E, acute accent
  80     "eacute"    =>    "\xE9",   # small e, acute accent
  81     "Ecirc"     =>    "\xCA",   # capital E, circumflex accent
  82     "ecirc"     =>    "\xEA",   # small e, circumflex accent
  83     "Egrave"    =>    "\xC8",   # capital E, grave accent
  84     "egrave"    =>    "\xE8",   # small e, grave accent
  85     "ETH"       =>    "\xD0",   # capital Eth, Icelandic
  86     "eth"       =>    "\xF0",   # small eth, Icelandic
  87     "Euml"      =>    "\xCB",   # capital E, dieresis or umlaut mark
  88     "euml"      =>    "\xEB",   # small e, dieresis or umlaut mark
  89     "Iacute"    =>    "\xCD",   # capital I, acute accent
  90     "iacute"    =>    "\xED",   # small i, acute accent
  91     "Icirc"     =>    "\xCE",   # capital I, circumflex accent
  92     "icirc"     =>    "\xEE",   # small i, circumflex accent
  93     "Igrave"    =>    "\xCC",   # capital I, grave accent
  94     "igrave"    =>    "\xEC",   # small i, grave accent
  95     "Iuml"      =>    "\xCF",   # capital I, dieresis or umlaut mark
  96     "iuml"      =>    "\xEF",   # small i, dieresis or umlaut mark
  97     "Ntilde"    =>    "\xD1",   # capital N, tilde
  98     "ntilde"    =>    "\xF1",   # small n, tilde
  99     "Oacute"    =>    "\xD3",   # capital O, acute accent
 100     "oacute"    =>    "\xF3",   # small o, acute accent
 101     "Ocirc"     =>    "\xD4",   # capital O, circumflex accent
 102     "ocirc"     =>    "\xF4",   # small o, circumflex accent
 103     "Ograve"    =>    "\xD2",   # capital O, grave accent
 104     "ograve"    =>    "\xF2",   # small o, grave accent
 105     "Oslash"    =>    "\xD8",   # capital O, slash
 106     "oslash"    =>    "\xF8",   # small o, slash
 107     "Otilde"    =>    "\xD5",   # capital O, tilde
 108     "otilde"    =>    "\xF5",   # small o, tilde
 109     "Ouml"      =>    "\xD6",   # capital O, dieresis or umlaut mark
 110     "ouml"      =>    "\xF6",   # small o, dieresis or umlaut mark
 111     "szlig"     =>    "\xDF",   # small sharp s, German (sz ligature)
 112     "THORN"     =>    "\xDE",   # capital THORN, Icelandic
 113     "thorn"     =>    "\xFE",   # small thorn, Icelandic
 114     "Uacute"    =>    "\xDA",   # capital U, acute accent
 115     "uacute"    =>    "\xFA",   # small u, acute accent
 116     "Ucirc"     =>    "\xDB",   # capital U, circumflex accent
 117     "ucirc"     =>    "\xFB",   # small u, circumflex accent
 118     "Ugrave"    =>    "\xD9",   # capital U, grave accent
 119     "ugrave"    =>    "\xF9",   # small u, grave accent
 120     "Uuml"      =>    "\xDC",   # capital U, dieresis or umlaut mark
 121     "uuml"      =>    "\xFC",   # small u, dieresis or umlaut mark
 122     "Yacute"    =>    "\xDD",   # capital Y, acute accent
 123     "yacute"    =>    "\xFD",   # small y, acute accent
 124     "yuml"      =>    "\xFF",   # small y, dieresis or umlaut mark
 125
 126     "laquo"     =>    "\xAB",   # left pointing double angle quotation mark
 127     "lchevron"  =>    "\xAB",   #  synonym (backwards compatibility)
 128     "raquo"     =>    "\xBB",   # right pointing double angle quotation mark
 129     "rchevron"  =>    "\xBB",   #  synonym (backwards compatibility)
 130
 131     "iexcl"     =>    "\xA1",   # inverted exclamation mark
 132     "cent"      =>    "\xA2",   # cent sign
 133     "pound"     =>    "\xA3",   # (UK) pound sign
 134     "curren"    =>    "\xA4",   # currency sign
 135     "yen"       =>    "\xA5",   # yen sign
 136     "brvbar"    =>    "\xA6",   # broken vertical bar
 137     "sect"      =>    "\xA7",   # section sign
 138     "uml"       =>    "\xA8",   # diaresis
 139     "copy"      =>    "\xA9",   # Copyright symbol
 140     "ordf"      =>    "\xAA",   # feminine ordinal indicator
 141     "not"       =>    "\xAC",   # not sign
 142     "shy"       =>    "\xAD",   # soft hyphen
 143     "reg"       =>    "\xAE",   # registered trademark
 144     "macr"      =>    "\xAF",   # macron, overline
 145     "deg"       =>    "\xB0",   # degree sign
 146     "plusmn"    =>    "\xB1",   # plus-minus sign
 147     "sup2"      =>    "\xB2",   # superscript 2
 148     "sup3"      =>    "\xB3",   # superscript 3
 149     "acute"     =>    "\xB4",   # acute accent
 150     "micro"     =>    "\xB5",   # micro sign
 151     "para"      =>    "\xB6",   # pilcrow sign = paragraph sign
 152     "middot"    =>    "\xB7",   # middle dot = Georgian comma
 153     "cedil"     =>    "\xB8",   # cedilla
 154     "sup1"      =>    "\xB9",   # superscript 1
 155     "ordm"      =>    "\xBA",   # masculine ordinal indicator
 156     "frac14"    =>    "\xBC",   # vulgar fraction one quarter
 157     "frac12"    =>    "\xBD",   # vulgar fraction one half
 158     "frac34"    =>    "\xBE",   # vulgar fraction three quarters
 159     "iquest"    =>    "\xBF",   # inverted question mark
 160     "times"     =>    "\xD7",   # multiplication sign
 161     "divide"    =>    "\xF7",   # division sign
 162 );
 163
 164
 165 ##############################################################################
 166 # Initialization
 167 ##############################################################################
 168
 169 # Initialize the object.  Must be sure to call our parent initializer.
 170 sub initialize {
 171     my $self = shift;
 172
 173     $$self{alt}      = 0  unless defined $$self{alt};
 174     $$self{indent}   = 4  unless defined $$self{indent};
 175     $$self{loose}    = 0  unless defined $$self{loose};
 176     $$self{sentence} = 0  unless defined $$self{sentence};
 177     $$self{width}    = 76 unless defined $$self{width};
 178
 179     # Figure out what quotes we'll be using for C<> text.
 180     $$self{quotes} ||= '"';
 181     if ($$self{quotes} eq 'none') {
 182         $$self{LQUOTE} = $$self{RQUOTE} = '';
 183     } elsif (length ($$self{quotes}) == 1) {
 184         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 185     } elsif ($$self{quotes} =~ /^(.)(.)$/
 186              || $$self{quotes} =~ /^(..)(..)$/) {
 187         $$self{LQUOTE} = $1;
 188         $$self{RQUOTE} = $2;
 189     } else {
 190         croak qq(Invalid quote specification "$$self{quotes}");
 191     }
 192
 193     $$self{INDENTS}  = [];              # Stack of indentations.
 194     $$self{MARGIN}   = $$self{indent};  # Current left margin in spaces.
 195
 196     $self->SUPER::initialize;
 197
 198     # Tell Pod::Parser that we want the non-POD stuff too if code was set.
 199     $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
 200 }
 201
 202
 203 ##############################################################################
 204 # Core overrides
 205 ##############################################################################
 206
 207 # Called for each command paragraph.  Gets the command, the associated
 208 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 209 # the command to a method named the same as the command.  =cut is handled
 210 # internally by Pod::Parser.
 211 sub command {
 212     my $self = shift;
 213     my $command = shift;
 214     return if $command eq 'pod';
 215     return if ($$self{EXCLUDE} && $command ne 'end');
 216     $self->item ("\n") if defined $$self{ITEM};
 217     if ($self->can ('cmd_' . $command)) {
 218         $command = 'cmd_' . $command;
 219         $self->$command (@_);
 220     } else {
 221         my ($text, $line, $paragraph) = @_;
 222         my $file;
 223         ($file, $line) = $paragraph->file_line;
 224         $text =~ s/\n+\z//;
 225         $text = " $text" if ($text =~ /^\S/);
 226         warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
 227         return;
 228     }
 229 }
 230
 231 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 232 # Pod::Paragraph object.  Just output it verbatim, but with tabs converted to
 233 # spaces.
 234 sub verbatim {
 235     my $self = shift;
 236     return if $$self{EXCLUDE};
 237     $self->item if defined $$self{ITEM};
 238     local $_ = shift;
 239     return if /^\s*$/;
 240     s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
 241     $self->output ($_);
 242 }
 243
 244 # Called for a regular text block.  Gets the paragraph, the line number, and a
 245 # Pod::Paragraph object.  Perform interpolation and output the results.
 246 sub textblock {
 247     my $self = shift;
 248     return if $$self{EXCLUDE};
 249     $self->output ($_[0]), return if $$self{VERBATIM};
 250     local $_ = shift;
 251     my $line = shift;
 252
 253     # Perform a little magic to collapse multiple L<> references.  This is
 254     # here mostly for backwards-compatibility.  We'll just rewrite the whole
 255     # thing into actual text at this part, bypassing the whole internal
 256     # sequence parsing thing.
 257     s{
 258         (
 259           L<                    # A link of the form L</something>.
 260               /
 261               (
 262                   [:\w]+        # The item has to be a simple word...
 263                   (\(\))?       # ...or simple function.
 264               )
 265           >
 266           (
 267               ,?\s+(and\s+)?    # Allow lots of them, conjuncted.
 268               L<
 269                   /
 270                   (
 271                       [:\w]+
 272                       (\(\))?
 273                   )
 274               >
 275           )+
 276         )
 277     } {
 278         local $_ = $1;
 279         s%L</([^>]+)>%$1%g;
 280         my @items = split /(?:,?\s+(?:and\s+)?)/;
 281         my $string = "the ";
 282         my $i;
 283         for ($i = 0; $i < @items; $i++) {
 284             $string .= $items[$i];
 285             $string .= ", " if @items > 2 && $i != $#items;
 286             $string .= " and " if ($i == $#items - 1);
 287         }
 288         $string .= " entries elsewhere in this document";
 289         $string;
 290     }gex;
 291
 292     # Now actually interpolate and output the paragraph.
 293     $_ = $self->interpolate ($_, $line);
 294     s/\s+$/\n/;
 295     if (defined $$self{ITEM}) {
 296         $self->item ($_ . "\n");
 297     } else {
 298         $self->output ($self->reformat ($_ . "\n"));
 299     }
 300 }
 301
 302 # Called for an interior sequence.  Gets the command, argument, and a
 303 # Pod::InteriorSequence object and is expected to return the resulting text.
 304 # Calls code, bold, italic, file, and link to handle those types of sequences,
 305 # and handles S<>, E<>, X<>, and Z<> directly.
 306 sub interior_sequence {
 307     my $self = shift;
 308     my $command = shift;
 309     local $_ = shift;
 310     return '' if ($command eq 'X' || $command eq 'Z');
 311
 312     # Expand escapes into the actual character now, warning if invalid.
 313     if ($command eq 'E') {
 314         if (/^\d+$/) {
 315             return chr;
 316         } else {
 317             return $ESCAPES{$_} if defined $ESCAPES{$_};
 318             my $seq = shift;
 319             my ($file, $line) = $seq->file_line;
 320             warn "$file:$line: Unknown escape: E<$_>\n";
 321             return "E<$_>";
 322         }
 323     }
 324
 325     # For all the other sequences, empty content produces no output.
 326     return if $_ eq '';
 327
 328     # For S<>, compress all internal whitespace and then map spaces to \01.
 329     # When we output the text, we'll map this back.
 330     if ($command eq 'S') {
 331         s/\s{2,}/ /g;
 332         tr/ /\01/;
 333         return $_;
 334     }
 335
 336     # Anything else needs to get dispatched to another method.
 337     if    ($command eq 'B') { return $self->seq_b ($_) }
 338     elsif ($command eq 'C') { return $self->seq_c ($_) }
 339     elsif ($command eq 'F') { return $self->seq_f ($_) }
 340     elsif ($command eq 'I') { return $self->seq_i ($_) }
 341     elsif ($command eq 'L') { return $self->seq_l ($_) }
 342     else {
 343         my $seq = shift;
 344         my ($file, $line) = $seq->file_line;
 345         warn "$file:$line: Unknown sequence $command<$_>\n";
 346     }
 347 }
 348
 349 # Called for each paragraph that's actually part of the POD.  We take
 350 # advantage of this opportunity to untabify the input.  Also, if given the
 351 # code option, we may see paragraphs that aren't part of the POD and need to
 352 # output them directly.
 353 sub preprocess_paragraph {
 354     my $self = shift;
 355     local $_ = shift;
 356     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 357     $self->output_code ($_) if $self->cutting;
 358     $_;
 359 }
 360
 361
 362 ##############################################################################
 363 # Command paragraphs
 364 ##############################################################################
 365
 366 # All command paragraphs take the paragraph and the line number.
 367
 368 # First level heading.
 369 sub cmd_head1 {
 370     my $self = shift;
 371     local $_ = shift;
 372     s/\s+$//;
 373     $_ = $self->interpolate ($_, shift);
 374     if ($$self{alt}) {
 375         $self->output ("\n==== $_ ====\n\n");
 376     } else {
 377         $_ .= "\n" if $$self{loose};
 378         $self->output ($_ . "\n");
 379     }
 380 }
 381
 382 # Second level heading.
 383 sub cmd_head2 {
 384     my $self = shift;
 385     local $_ = shift;
 386     s/\s+$//;
 387     $_ = $self->interpolate ($_, shift);
 388     if ($$self{alt}) {
 389         $self->output ("\n==   $_   ==\n\n");
 390     } else {
 391         $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
 392     }
 393 }
 394
 395 # Third level heading.
 396 sub cmd_head3 {
 397     my $self = shift;
 398     local $_ = shift;
 399     s/\s+$//;
 400     $_ = $self->interpolate ($_, shift);
 401     if ($$self{alt}) {
 402         $self->output ("\n=    $_    =\n\n");
 403     } else {
 404         $self->output (' ' x ($$self{indent} * 2 / 3 + 0.5) . $_ . "\n\n");
 405     }
 406 }
 407
 408 # Third level heading.
 409 sub cmd_head4 {
 410     my $self = shift;
 411     local $_ = shift;
 412     s/\s+$//;
 413     $_ = $self->interpolate ($_, shift);
 414     if ($$self{alt}) {
 415         $self->output ("\n-    $_    -\n\n");
 416     } else {
 417         $self->output (' ' x ($$self{indent} * 3 / 4 + 0.5) . $_ . "\n\n");
 418     }
 419 }
 420
 421 # Start a list.
 422 sub cmd_over {
 423     my $self = shift;
 424     local $_ = shift;
 425     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 426     push (@{ $$self{INDENTS} }, $$self{MARGIN});
 427     $$self{MARGIN} += ($_ + 0);
 428 }
 429
 430 # End a list.
 431 sub cmd_back {
 432     my ($self, $text, $line, $paragraph) = @_;
 433     $$self{MARGIN} = pop @{ $$self{INDENTS} };
 434     unless (defined $$self{MARGIN}) {
 435         my $file;
 436         ($file, $line) = $paragraph->file_line;
 437         warn "$file:$line: Unmatched =back\n";
 438         $$self{MARGIN} = $$self{indent};
 439     }
 440 }
 441
 442 # An individual list item.
 443 sub cmd_item {
 444     my $self = shift;
 445     if (defined $$self{ITEM}) { $self->item }
 446     local $_ = shift;
 447     s/\s+$//;
 448     $$self{ITEM} = $self->interpolate ($_);
 449 }
 450
 451 # Begin a block for a particular translator.  Setting VERBATIM triggers
 452 # special handling in textblock().
 453 sub cmd_begin {
 454     my $self = shift;
 455     local $_ = shift;
 456     my ($kind) = /^(\S+)/ or return;
 457     if ($kind eq 'text') {
 458         $$self{VERBATIM} = 1;
 459     } else {
 460         $$self{EXCLUDE} = 1;
 461     }
 462 }
 463
 464 # End a block for a particular translator.  We assume that all =begin/=end
 465 # pairs are properly closed.
 466 sub cmd_end {
 467     my $self = shift;
 468     $$self{EXCLUDE} = 0;
 469     $$self{VERBATIM} = 0;
 470 }
 471
 472 # One paragraph for a particular translator.  Ignore it unless it's intended
 473 # for text, in which case we treat it as a verbatim text block.
 474 sub cmd_for {
 475     my $self = shift;
 476     local $_ = shift;
 477     my $line = shift;
 478     return unless s/^text\b[ \t]*\n?//;
 479     $self->verbatim ($_, $line);
 480 }
 481
 482
 483 ##############################################################################
 484 # Interior sequences
 485 ##############################################################################
 486
 487 # The simple formatting ones.  These are here mostly so that subclasses can
 488 # override them and do more complicated things.
 489 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
 490 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
 491 sub seq_i { return '*' . $_[1] . '*' }
 492
 493 # Apply a whole bunch of messy heuristics to not quote things that don't
 494 # benefit from being quoted.  These originally come from Barrie Slaymaker and
 495 # largely duplicate code in Pod::Man.
 496 sub seq_c {
 497     my $self = shift;
 498     local $_ = shift;
 499
 500     # A regex that matches the portion of a variable reference that's the
 501     # array or hash index, separated out just because we want to use it in
 502     # several places in the following regex.
 503     my $index = '(?: \[.*\] | \{.*\} )?';
 504
 505     # Check for things that we don't want to quote, and if we find any of
 506     # them, return the string with just a font change and no quoting.
 507     m{
 508       ^\s*
 509       (?:
 510          ( [\'\`\"] ) .* \1                             # already quoted
 511        | \` .* \'                                       # `quoted'
 512        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
 513        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
 514        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
 515        | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )?             # a number
 516        | 0x [a-fA-F\d]+                                 # a hex constant
 517       )
 518       \s*\z
 519      }xo && return $_;
 520
 521     # If we didn't return, go ahead and quote the text.
 522     return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
 523 }
 524
 525 # The complicated one.  Handle links.  Since this is plain text, we can't
 526 # actually make any real links, so this is all to figure out what text we
 527 # print out.
 528 sub seq_l {
 529     my $self = shift;
 530     local $_ = shift;
 531
 532     # Smash whitespace in case we were split across multiple lines.
 533     s/\s+/ /g;
 534
 535     # If we were given any explicit text, just output it.
 536     if (/^([^|]+)\|/) { return $1 }
 537
 538     # Okay, leading and trailing whitespace isn't important; get rid of it.
 539     s/^\s+//;
 540     s/\s+$//;
 541
 542     # If the argument looks like a URL, return it verbatim.  This only handles
 543     # URLs that use the server syntax.
 544     if (m%^[a-z]+://\S+$%) { return $_ }
 545
 546     # Default to using the whole content of the link entry as a section name.
 547     # Note that L<manpage/> forces a manpage interpretation, as does something
 548     # looking like L<manpage(section)>.  The latter is an enhancement over the
 549     # original Pod::Text.
 550     my ($manpage, $section) = ('', $_);
 551     if (/^"\s*(.*?)\s*"$/) {
 552         $section = '"' . $1 . '"';
 553     } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
 554         ($manpage, $section) = ($_, '');
 555     } elsif (m%/%) {
 556         ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
 557     }
 558
 559     # Now build the actual output text.
 560     my $text = '';
 561     if (!length $section) {
 562         $text = "the $manpage manpage" if length $manpage;
 563     } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
 564         $text .= 'the ' . $section . ' entry';
 565         $text .= (length $manpage) ? " in the $manpage manpage"
 566                                    : " elsewhere in this document";
 567     } else {
 568         $section =~ s/^\"\s*//;
 569         $section =~ s/\s*\"$//;
 570         $text .= 'the section on "' . $section . '"';
 571         $text .= " in the $manpage manpage" if length $manpage;
 572     }
 573     $text;
 574 }
 575
 576
 577 ##############################################################################
 578 # List handling
 579 ##############################################################################
 580
 581 # This method is called whenever an =item command is complete (in other words,
 582 # we've seen its associated paragraph or know for certain that it doesn't have
 583 # one).  It gets the paragraph associated with the item as an argument.  If
 584 # that argument is empty, just output the item tag; if it contains a newline,
 585 # output the item tag followed by the newline.  Otherwise, see if there's
 586 # enough room for us to output the item tag in the margin of the text or if we
 587 # have to put it on a separate line.
 588 sub item {
 589     my $self = shift;
 590     local $_ = shift;
 591     my $tag = $$self{ITEM};
 592     unless (defined $tag) {
 593         carp "Item called without tag";
 594         return;
 595     }
 596     undef $$self{ITEM};
 597     my $indent = $$self{INDENTS}[-1];
 598     unless (defined $indent) { $indent = $$self{indent} }
 599     my $space = ' ' x $indent;
 600     $space =~ s/^ /:/ if $$self{alt};
 601     if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
 602         my $margin = $$self{MARGIN};
 603         $$self{MARGIN} = $indent;
 604         my $output = $self->reformat ($tag);
 605         $output =~ s/\n*$/\n/;
 606         $self->output ($output);
 607         $$self{MARGIN} = $margin;
 608         $self->output ($self->reformat ($_)) if /\S/;
 609     } else {
 610         $_ = $self->reformat ($_);
 611         s/^ /:/ if ($$self{alt} && $indent > 0);
 612         my $tagspace = ' ' x length $tag;
 613         s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
 614         $self->output ($_);
 615     }
 616 }
 617
 618
 619 ##############################################################################
 620 # Output formatting
 621 ##############################################################################
 622
 623 # Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
 624 # because it plays games with tabs.  We can't use formline, even though we'd
 625 # really like to, because it screws up non-printing characters.  So we have to
 626 # do the wrapping ourselves.
 627 sub wrap {
 628     my $self = shift;
 629     local $_ = shift;
 630     my $output = '';
 631     my $spaces = ' ' x $$self{MARGIN};
 632     my $width = $$self{width} - $$self{MARGIN};
 633     while (length > $width) {
 634         if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
 635             $output .= $spaces . $1 . "\n";
 636         } else {
 637             last;
 638         }
 639     }
 640     $output .= $spaces . $_;
 641     $output =~ s/\s+$/\n\n/;
 642     $output;
 643 }
 644
 645 # Reformat a paragraph of text for the current margin.  Takes the text to
 646 # reformat and returns the formatted text.
 647 sub reformat {
 648     my $self = shift;
 649     local $_ = shift;
 650
 651     # If we're trying to preserve two spaces after sentences, do some munging
 652     # to support that.  Otherwise, smash all repeated whitespace.
 653     if ($$self{sentence}) {
 654         s/ +$//mg;
 655         s/\.\n/. \n/g;
 656         s/\n/ /g;
 657         s/   +/  /g;
 658     } else {
 659         s/\s+/ /g;
 660     }
 661     $self->wrap ($_);
 662 }
 663
 664 # Output text to the output device.
 665 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
 666
 667 # Output a block of code (something that isn't part of the POD text).  Called
 668 # by preprocess_paragraph only if we were given the code option.  Exists here
 669 # only so that it can be overridden by subclasses.
 670 sub output_code { $_[0]->output ($_[1]) }
 671
 672
 673 ##############################################################################
 674 # Backwards compatibility
 675 ##############################################################################
 676
 677 # The old Pod::Text module did everything in a pod2text() function.  This
 678 # tries to provide the same interface for legacy applications.
 679 sub pod2text {
 680     my @args;
 681
 682     # This is really ugly; I hate doing option parsing in the middle of a
 683     # module.  But the old Pod::Text module supported passing flags to its
 684     # entry function, so handle -a and -<number>.
 685     while ($_[0] =~ /^-/) {
 686         my $flag = shift;
 687         if    ($flag eq '-a')       { push (@args, alt => 1)    }
 688         elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
 689         else {
 690             unshift (@_, $flag);
 691             last;
 692         }
 693     }
 694
 695     # Now that we know what arguments we're using, create the parser.
 696     my $parser = Pod::Text->new (@args);
 697
 698     # If two arguments were given, the second argument is going to be a file
 699     # handle.  That means we want to call parse_from_filehandle(), which means
 700     # we need to turn the first argument into a file handle.  Magic open will
 701     # handle the <&STDIN case automagically.
 702     if (defined $_[1]) {
 703         my @fhs = @_;
 704         local *IN;
 705         unless (open (IN, $fhs[0])) {
 706             croak ("Can't open $fhs[0] for reading: $!\n");
 707             return;
 708         }
 709         $fhs[0] = \*IN;
 710         return $parser->parse_from_filehandle (@fhs);
 711     } else {
 712         return $parser->parse_from_file (@_);
 713     }
 714 }
 715
 716
 717 ##############################################################################
 718 # Module return value and documentation
 719 ##############################################################################
 720
 721 1;
 722 __END__
 723
 724 =head1 NAME
 725
 726 Pod::Text - Convert POD data to formatted ASCII text
 727
 728 =head1 SYNOPSIS
 729
 730     use Pod::Text;
 731     my $parser = Pod::Text->new (sentence => 0, width => 78);
 732
 733     # Read POD from STDIN and write to STDOUT.
 734     $parser->parse_from_filehandle;
 735
 736     # Read POD from file.pod and write to file.txt.
 737     $parser->parse_from_file ('file.pod', 'file.txt');
 738
 739 =head1 DESCRIPTION
 740
 741 Pod::Text is a module that can convert documentation in the POD format (the
 742 preferred language for documenting Perl) into formatted ASCII.  It uses no
 743 special formatting controls or codes whatsoever, and its output is therefore
 744 suitable for nearly any device.
 745
 746 As a derived class from Pod::Parser, Pod::Text supports the same methods and
 747 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
 748 new parser with C<Pod::Text-E<gt>new()> and then calls either
 749 parse_from_filehandle() or parse_from_file().
 750
 751 new() can take options, in the form of key/value pairs, that control the
 752 behavior of the parser.  The currently recognized options are:
 753
 754 =over 4
 755
 756 =item alt
 757
 758 If set to a true value, selects an alternate output format that, among other
 759 things, uses a different heading style and marks C<=item> entries with a
 760 colon in the left margin.  Defaults to false.
 761
 762 =item code
 763
 764 If set to a true value, the non-POD parts of the input file will be included
 765 in the output.  Useful for viewing code documented with POD blocks with the
 766 POD rendered and the code left intact.
 767
 768 =item indent
 769
 770 The number of spaces to indent regular text, and the default indentation for
 771 C<=over> blocks.  Defaults to 4.
 772
 773 =item loose
 774
 775 If set to a true value, a blank line is printed after a C<=head1> heading.
 776 If set to false (the default), no blank line is printed after C<=head1>,
 777 although one is still printed after C<=head2>.  This is the default because
 778 it's the expected formatting for manual pages; if you're formatting
 779 arbitrary text documents, setting this to true may result in more pleasing
 780 output.
 781
 782 =item quotes
 783
 784 Sets the quote marks used to surround CE<lt>> text.  If the value is a
 785 single character, it is used as both the left and right quote; if it is two
 786 characters, the first character is used as the left quote and the second as
 787 the right quoted; and if it is four characters, the first two are used as
 788 the left quote and the second two as the right quote.
 789
 790 This may also be set to the special value C<none>, in which case no quote
 791 marks are added around CE<lt>> text.
 792
 793 =item sentence
 794
 795 If set to a true value, Pod::Text will assume that each sentence ends in two
 796 spaces, and will try to preserve that spacing.  If set to false, all
 797 consecutive whitespace in non-verbatim paragraphs is compressed into a
 798 single space.  Defaults to true.
 799
 800 =item width
 801
 802 The column at which to wrap text on the right-hand side.  Defaults to 76.
 803
 804 =back
 805
 806 The standard Pod::Parser method parse_from_filehandle() takes up to two
 807 arguments, the first being the file handle to read POD from and the second
 808 being the file handle to write the formatted output to.  The first defaults
 809 to STDIN if not given, and the second defaults to STDOUT.  The method
 810 parse_from_file() is almost identical, except that its two arguments are the
 811 input and output disk files instead.  See L<Pod::Parser> for the specific
 812 details.
 813
 814 =head1 DIAGNOSTICS
 815
 816 =over 4
 817
 818 =item Bizarre space in item
 819
 820 =item Item called without tag
 821
 822 (W) Something has gone wrong in internal C<=item> processing.  These
 823 messages indicate a bug in Pod::Text; you should never see them.
 824
 825 =item Can't open %s for reading: %s
 826
 827 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
 828 and the input file it was given could not be opened.
 829
 830 =item Invalid quote specification "%s"
 831
 832 (F) The quote specification given (the quotes option to the constructor) was
 833 invalid.  A quote specification must be one, two, or four characters long.
 834
 835 =item %s:%d: Unknown command paragraph "%s".
 836
 837 (W) The POD source contained a non-standard command paragraph (something of
 838 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
 839
 840 =item %s:%d: Unknown escape: %s
 841
 842 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
 843 know about.
 844
 845 =item %s:%d: Unknown sequence: %s
 846
 847 (W) The POD source contained a non-standard internal sequence (something of
 848 the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
 849
 850 =item %s:%d: Unmatched =back
 851
 852 (W) Pod::Text encountered a C<=back> command that didn't correspond to an
 853 C<=over> command.
 854
 855 =back
 856
 857 =head1 RESTRICTIONS
 858
 859 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
 860 output, due to an internal implementation detail.
 861
 862 =head1 NOTES
 863
 864 This is a replacement for an earlier Pod::Text module written by Tom
 865 Christiansen.  It has a revamped interface, since it now uses Pod::Parser,
 866 but an interface roughly compatible with the old Pod::Text::pod2text()
 867 function is still available.  Please change to the new calling convention,
 868 though.
 869
 870 The original Pod::Text contained code to do formatting via termcap
 871 sequences, although it wasn't turned on by default and it was problematic to
 872 get it to work at all.  This rewrite doesn't even try to do that, but a
 873 subclass of it does.  Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
 874
 875 =head1 SEE ALSO
 876
 877 L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
 878 pod2text(1)
 879
 880 =head1 AUTHOR
 881
 882 Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
 883 original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
 884 its conversion to Pod::Parser by Brad Appleton
 885 E<lt>bradapp@enteract.comE<gt>.
 886
 887 =head1 COPYRIGHT AND LICENSE
 888
 889 Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
 890
 891 This program is free software; you may redistribute it and/or modify it
 892 under the same terms as Perl itself.
 893
 894 =cut