lib/Pod/Text.pm

   1 # Pod::Text -- Convert POD data to formatted ASCII text.
   2 # $Id: Text.pm,v 2.16 2001/11/28 01:15:50 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module converts POD to formatted text.  It replaces the old Pod::Text
  10 # module that came with versions of Perl prior to 5.6.0 and attempts to match
  11 # its output except for some specific circumstances where other decisions
  12 # seemed to produce better output.  It uses Pod::Parser and is designed to be
  13 # very easy to subclass.
  14 #
  15 # Perl core hackers, please note that this module is also separately
  16 # maintained outside of the Perl core as part of the podlators.  Please send
  17 # me any patches at the address above in addition to sending them to the
  18 # standard Perl mailing lists.
  19
  20 ##############################################################################
  21 # Modules and declarations
  22 ##############################################################################
  23
  24 package Pod::Text;
  25
  26 require 5.004;
  27
  28 use Carp qw(carp croak);
  29 use Exporter ();
  30 use Pod::ParseLink qw(parselink);
  31 use Pod::Select ();
  32
  33 use strict;
  34 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
  35
  36 # We inherit from Pod::Select instead of Pod::Parser so that we can be used by
  37 # Pod::Usage.
  38 @ISA = qw(Pod::Select Exporter);
  39
  40 # We have to export pod2text for backward compatibility.
  41 @EXPORT = qw(pod2text);
  42
  43 # Don't use the CVS revision as the version, since this module is also in Perl
  44 # core and too many things could munge CVS magic revision strings.  This
  45 # number should ideally be the same as the CVS revision in podlators, however.
  46 $VERSION = 2.16;
  47
  48
  49 ##############################################################################
  50 # Table of supported E<> escapes
  51 ##############################################################################
  52
  53 # This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
  54 # got it near verbatim from the original Pod::Text.  It is therefore credited
  55 # to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
  56 # "divide" added by Tim Jenness.
  57 %ESCAPES = (
  58     'amp'       =>    '&',      # ampersand
  59     'apos'      =>    "'",      # apostrophe
  60     'lt'        =>    '<',      # left chevron, less-than
  61     'gt'        =>    '>',      # right chevron, greater-than
  62     'quot'      =>    '"',      # double quote
  63     'sol'       =>    '/',      # solidus (forward slash)
  64     'verbar'    =>    '|',      # vertical bar
  65
  66     "Aacute"    =>    "\xC1",   # capital A, acute accent
  67     "aacute"    =>    "\xE1",   # small a, acute accent
  68     "Acirc"     =>    "\xC2",   # capital A, circumflex accent
  69     "acirc"     =>    "\xE2",   # small a, circumflex accent
  70     "AElig"     =>    "\xC6",   # capital AE diphthong (ligature)
  71     "aelig"     =>    "\xE6",   # small ae diphthong (ligature)
  72     "Agrave"    =>    "\xC0",   # capital A, grave accent
  73     "agrave"    =>    "\xE0",   # small a, grave accent
  74     "Aring"     =>    "\xC5",   # capital A, ring
  75     "aring"     =>    "\xE5",   # small a, ring
  76     "Atilde"    =>    "\xC3",   # capital A, tilde
  77     "atilde"    =>    "\xE3",   # small a, tilde
  78     "Auml"      =>    "\xC4",   # capital A, dieresis or umlaut mark
  79     "auml"      =>    "\xE4",   # small a, dieresis or umlaut mark
  80     "Ccedil"    =>    "\xC7",   # capital C, cedilla
  81     "ccedil"    =>    "\xE7",   # small c, cedilla
  82     "Eacute"    =>    "\xC9",   # capital E, acute accent
  83     "eacute"    =>    "\xE9",   # small e, acute accent
  84     "Ecirc"     =>    "\xCA",   # capital E, circumflex accent
  85     "ecirc"     =>    "\xEA",   # small e, circumflex accent
  86     "Egrave"    =>    "\xC8",   # capital E, grave accent
  87     "egrave"    =>    "\xE8",   # small e, grave accent
  88     "ETH"       =>    "\xD0",   # capital Eth, Icelandic
  89     "eth"       =>    "\xF0",   # small eth, Icelandic
  90     "Euml"      =>    "\xCB",   # capital E, dieresis or umlaut mark
  91     "euml"      =>    "\xEB",   # small e, dieresis or umlaut mark
  92     "Iacute"    =>    "\xCD",   # capital I, acute accent
  93     "iacute"    =>    "\xED",   # small i, acute accent
  94     "Icirc"     =>    "\xCE",   # capital I, circumflex accent
  95     "icirc"     =>    "\xEE",   # small i, circumflex accent
  96     "Igrave"    =>    "\xCC",   # capital I, grave accent
  97     "igrave"    =>    "\xEC",   # small i, grave accent
  98     "Iuml"      =>    "\xCF",   # capital I, dieresis or umlaut mark
  99     "iuml"      =>    "\xEF",   # small i, dieresis or umlaut mark
 100     "Ntilde"    =>    "\xD1",   # capital N, tilde
 101     "ntilde"    =>    "\xF1",   # small n, tilde
 102     "Oacute"    =>    "\xD3",   # capital O, acute accent
 103     "oacute"    =>    "\xF3",   # small o, acute accent
 104     "Ocirc"     =>    "\xD4",   # capital O, circumflex accent
 105     "ocirc"     =>    "\xF4",   # small o, circumflex accent
 106     "Ograve"    =>    "\xD2",   # capital O, grave accent
 107     "ograve"    =>    "\xF2",   # small o, grave accent
 108     "Oslash"    =>    "\xD8",   # capital O, slash
 109     "oslash"    =>    "\xF8",   # small o, slash
 110     "Otilde"    =>    "\xD5",   # capital O, tilde
 111     "otilde"    =>    "\xF5",   # small o, tilde
 112     "Ouml"      =>    "\xD6",   # capital O, dieresis or umlaut mark
 113     "ouml"      =>    "\xF6",   # small o, dieresis or umlaut mark
 114     "szlig"     =>    "\xDF",   # small sharp s, German (sz ligature)
 115     "THORN"     =>    "\xDE",   # capital THORN, Icelandic
 116     "thorn"     =>    "\xFE",   # small thorn, Icelandic
 117     "Uacute"    =>    "\xDA",   # capital U, acute accent
 118     "uacute"    =>    "\xFA",   # small u, acute accent
 119     "Ucirc"     =>    "\xDB",   # capital U, circumflex accent
 120     "ucirc"     =>    "\xFB",   # small u, circumflex accent
 121     "Ugrave"    =>    "\xD9",   # capital U, grave accent
 122     "ugrave"    =>    "\xF9",   # small u, grave accent
 123     "Uuml"      =>    "\xDC",   # capital U, dieresis or umlaut mark
 124     "uuml"      =>    "\xFC",   # small u, dieresis or umlaut mark
 125     "Yacute"    =>    "\xDD",   # capital Y, acute accent
 126     "yacute"    =>    "\xFD",   # small y, acute accent
 127     "yuml"      =>    "\xFF",   # small y, dieresis or umlaut mark
 128
 129     "laquo"     =>    "\xAB",   # left pointing double angle quotation mark
 130     "lchevron"  =>    "\xAB",   #  synonym (backwards compatibility)
 131     "raquo"     =>    "\xBB",   # right pointing double angle quotation mark
 132     "rchevron"  =>    "\xBB",   #  synonym (backwards compatibility)
 133
 134     "iexcl"     =>    "\xA1",   # inverted exclamation mark
 135     "cent"      =>    "\xA2",   # cent sign
 136     "pound"     =>    "\xA3",   # (UK) pound sign
 137     "curren"    =>    "\xA4",   # currency sign
 138     "yen"       =>    "\xA5",   # yen sign
 139     "brvbar"    =>    "\xA6",   # broken vertical bar
 140     "sect"      =>    "\xA7",   # section sign
 141     "uml"       =>    "\xA8",   # diaresis
 142     "copy"      =>    "\xA9",   # Copyright symbol
 143     "ordf"      =>    "\xAA",   # feminine ordinal indicator
 144     "not"       =>    "\xAC",   # not sign
 145     "shy"       =>    '',       # soft (discretionary) hyphen
 146     "reg"       =>    "\xAE",   # registered trademark
 147     "macr"      =>    "\xAF",   # macron, overline
 148     "deg"       =>    "\xB0",   # degree sign
 149     "plusmn"    =>    "\xB1",   # plus-minus sign
 150     "sup2"      =>    "\xB2",   # superscript 2
 151     "sup3"      =>    "\xB3",   # superscript 3
 152     "acute"     =>    "\xB4",   # acute accent
 153     "micro"     =>    "\xB5",   # micro sign
 154     "para"      =>    "\xB6",   # pilcrow sign = paragraph sign
 155     "middot"    =>    "\xB7",   # middle dot = Georgian comma
 156     "cedil"     =>    "\xB8",   # cedilla
 157     "sup1"      =>    "\xB9",   # superscript 1
 158     "ordm"      =>    "\xBA",   # masculine ordinal indicator
 159     "frac14"    =>    "\xBC",   # vulgar fraction one quarter
 160     "frac12"    =>    "\xBD",   # vulgar fraction one half
 161     "frac34"    =>    "\xBE",   # vulgar fraction three quarters
 162     "iquest"    =>    "\xBF",   # inverted question mark
 163     "times"     =>    "\xD7",   # multiplication sign
 164     "divide"    =>    "\xF7",   # division sign
 165
 166     "nbsp"      =>    "\x01",   # non-breaking space
 167 );
 168
 169
 170 ##############################################################################
 171 # Initialization
 172 ##############################################################################
 173
 174 # Initialize the object.  Must be sure to call our parent initializer.
 175 sub initialize {
 176     my $self = shift;
 177
 178     $$self{alt}      = 0  unless defined $$self{alt};
 179     $$self{indent}   = 4  unless defined $$self{indent};
 180     $$self{loose}    = 0  unless defined $$self{loose};
 181     $$self{sentence} = 0  unless defined $$self{sentence};
 182     $$self{width}    = 76 unless defined $$self{width};
 183
 184     # Figure out what quotes we'll be using for C<> text.
 185     $$self{quotes} ||= '"';
 186     if ($$self{quotes} eq 'none') {
 187         $$self{LQUOTE} = $$self{RQUOTE} = '';
 188     } elsif (length ($$self{quotes}) == 1) {
 189         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 190     } elsif ($$self{quotes} =~ /^(.)(.)$/
 191              || $$self{quotes} =~ /^(..)(..)$/) {
 192         $$self{LQUOTE} = $1;
 193         $$self{RQUOTE} = $2;
 194     } else {
 195         croak qq(Invalid quote specification "$$self{quotes}");
 196     }
 197
 198     $$self{INDENTS}  = [];              # Stack of indentations.
 199     $$self{MARGIN}   = $$self{indent};  # Current left margin in spaces.
 200
 201     $self->SUPER::initialize;
 202
 203     # Tell Pod::Parser that we want the non-POD stuff too if code was set.
 204     $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
 205 }
 206
 207
 208 ##############################################################################
 209 # Core overrides
 210 ##############################################################################
 211
 212 # Called for each command paragraph.  Gets the command, the associated
 213 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 214 # the command to a method named the same as the command.  =cut is handled
 215 # internally by Pod::Parser.
 216 sub command {
 217     my $self = shift;
 218     my $command = shift;
 219     return if $command eq 'pod';
 220     return if ($$self{EXCLUDE} && $command ne 'end');
 221     if ($self->can ('cmd_' . $command)) {
 222         $command = 'cmd_' . $command;
 223         $self->$command (@_);
 224     } else {
 225         my ($text, $line, $paragraph) = @_;
 226         my $file;
 227         ($file, $line) = $paragraph->file_line;
 228         $text =~ s/\n+\z//;
 229         $text = " $text" if ($text =~ /^\S/);
 230         warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
 231         return;
 232     }
 233 }
 234
 235 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 236 # Pod::Paragraph object.  Just output it verbatim, but with tabs converted to
 237 # spaces.
 238 sub verbatim {
 239     my $self = shift;
 240     return if $$self{EXCLUDE};
 241     $self->item if defined $$self{ITEM};
 242     local $_ = shift;
 243     return if /^\s*$/;
 244     s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
 245     $self->output ($_);
 246 }
 247
 248 # Called for a regular text block.  Gets the paragraph, the line number, and a
 249 # Pod::Paragraph object.  Perform interpolation and output the results.
 250 sub textblock {
 251     my $self = shift;
 252     return if $$self{EXCLUDE};
 253     $self->output ($_[0]), return if $$self{VERBATIM};
 254     local $_ = shift;
 255     my $line = shift;
 256
 257     # Interpolate and output the paragraph.
 258     $_ = $self->interpolate ($_, $line);
 259     s/\s+$/\n/;
 260     if (defined $$self{ITEM}) {
 261         $self->item ($_ . "\n");
 262     } else {
 263         $self->output ($self->reformat ($_ . "\n"));
 264     }
 265 }
 266
 267 # Called for a formatting code.  Gets the command, argument, and a
 268 # Pod::InteriorSequence object and is expected to return the resulting text.
 269 # Calls methods for code, bold, italic, file, and link to handle those types
 270 # of codes, and handles S<>, E<>, X<>, and Z<> directly.
 271 sub interior_sequence {
 272     local $_;
 273     my ($self, $command, $seq);
 274     ($self, $command, $_, $seq) = @_;
 275
 276     # We have to defer processing of the inside of an L<> formatting code.  If
 277     # this code is nested inside an L<> code, return the literal raw text of
 278     # it.
 279     my $parent = $seq->nested;
 280     while (defined $parent) {
 281         return $seq->raw_text if ($parent->cmd_name eq 'L');
 282         $parent = $parent->nested;
 283     }
 284
 285     # Index entries are ignored in plain text.
 286     return '' if ($command eq 'X' || $command eq 'Z');
 287
 288     # Expand escapes into the actual character now, warning if invalid.
 289     if ($command eq 'E') {
 290         if (/^\d+$/) {
 291             return chr;
 292         } else {
 293             return $ESCAPES{$_} if defined $ESCAPES{$_};
 294             my $seq = shift;
 295             my ($file, $line) = $seq->file_line;
 296             warn "$file:$line: Unknown escape: E<$_>\n";
 297             return "E<$_>";
 298         }
 299     }
 300
 301     # For all the other formatting codes, empty content produces no output.
 302     return if $_ eq '';
 303
 304     # For S<>, compress all internal whitespace and then map spaces to \01.
 305     # When we output the text, we'll map this back.
 306     if ($command eq 'S') {
 307         s/\s+/ /g;
 308         tr/ /\01/;
 309         return $_;
 310     }
 311
 312     # Anything else needs to get dispatched to another method.
 313     if    ($command eq 'B') { return $self->seq_b ($_) }
 314     elsif ($command eq 'C') { return $self->seq_c ($_) }
 315     elsif ($command eq 'F') { return $self->seq_f ($_) }
 316     elsif ($command eq 'I') { return $self->seq_i ($_) }
 317     elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
 318     else {
 319         my $seq = shift;
 320         my ($file, $line) = $seq->file_line;
 321         warn "$file:$line: Unknown formatting code $command<$_>\n";
 322     }
 323 }
 324
 325 # Called for each paragraph that's actually part of the POD.  We take
 326 # advantage of this opportunity to untabify the input.  Also, if given the
 327 # code option, we may see paragraphs that aren't part of the POD and need to
 328 # output them directly.
 329 sub preprocess_paragraph {
 330     my $self = shift;
 331     local $_ = shift;
 332     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 333     $self->output_code ($_) if $self->cutting;
 334     $_;
 335 }
 336
 337
 338 ##############################################################################
 339 # Command paragraphs
 340 ##############################################################################
 341
 342 # All command paragraphs take the paragraph and the line number.
 343
 344 # First level heading.
 345 sub cmd_head1 {
 346     my ($self, $text, $line) = @_;
 347     $self->heading ($text, $line, 0, '====');
 348 }
 349
 350 # Second level heading.
 351 sub cmd_head2 {
 352     my ($self, $text, $line) = @_;
 353     $self->heading ($text, $line, $$self{indent} / 2, '==  ');
 354 }
 355
 356 # Third level heading.
 357 sub cmd_head3 {
 358     my ($self, $text, $line) = @_;
 359     $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '=   ');
 360 }
 361
 362 # Third level heading.
 363 sub cmd_head4 {
 364     my ($self, $text, $line) = @_;
 365     $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '-   ');
 366 }
 367
 368 # Start a list.
 369 sub cmd_over {
 370     my $self = shift;
 371     local $_ = shift;
 372     $self->item ("\n\n") if defined $$self{ITEM};
 373     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 374     push (@{ $$self{INDENTS} }, $$self{MARGIN});
 375     $$self{MARGIN} += ($_ + 0);
 376 }
 377
 378 # End a list.
 379 sub cmd_back {
 380     my ($self, $text, $line, $paragraph) = @_;
 381     $self->item ("\n\n") if defined $$self{ITEM};
 382     $$self{MARGIN} = pop @{ $$self{INDENTS} };
 383     unless (defined $$self{MARGIN}) {
 384         my $file;
 385         ($file, $line) = $paragraph->file_line;
 386         warn "$file:$line: Unmatched =back\n";
 387         $$self{MARGIN} = $$self{indent};
 388     }
 389 }
 390
 391 # An individual list item.
 392 sub cmd_item {
 393     my $self = shift;
 394     if (defined $$self{ITEM}) { $self->item }
 395     local $_ = shift;
 396     s/\s+$//;
 397     $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
 398 }
 399
 400 # Begin a block for a particular translator.  Setting VERBATIM triggers
 401 # special handling in textblock().
 402 sub cmd_begin {
 403     my $self = shift;
 404     local $_ = shift;
 405     my ($kind) = /^(\S+)/ or return;
 406     if ($kind eq 'text') {
 407         $$self{VERBATIM} = 1;
 408     } else {
 409         $$self{EXCLUDE} = 1;
 410     }
 411 }
 412
 413 # End a block for a particular translator.  We assume that all =begin/=end
 414 # pairs are properly closed.
 415 sub cmd_end {
 416     my $self = shift;
 417     $$self{EXCLUDE} = 0;
 418     $$self{VERBATIM} = 0;
 419 }
 420
 421 # One paragraph for a particular translator.  Ignore it unless it's intended
 422 # for text, in which case we treat it as a verbatim text block.
 423 sub cmd_for {
 424     my $self = shift;
 425     local $_ = shift;
 426     my $line = shift;
 427     return unless s/^text\b[ \t]*\n?//;
 428     $self->verbatim ($_, $line);
 429 }
 430
 431
 432 ##############################################################################
 433 # Formatting codes
 434 ##############################################################################
 435
 436 # The simple ones.  These are here mostly so that subclasses can override them
 437 # and do more complicated things.
 438 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
 439 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
 440 sub seq_i { return '*' . $_[1] . '*' }
 441
 442 # Apply a whole bunch of messy heuristics to not quote things that don't
 443 # benefit from being quoted.  These originally come from Barrie Slaymaker and
 444 # largely duplicate code in Pod::Man.
 445 sub seq_c {
 446     my $self = shift;
 447     local $_ = shift;
 448
 449     # A regex that matches the portion of a variable reference that's the
 450     # array or hash index, separated out just because we want to use it in
 451     # several places in the following regex.
 452     my $index = '(?: \[.*\] | \{.*\} )?';
 453
 454     # Check for things that we don't want to quote, and if we find any of
 455     # them, return the string with just a font change and no quoting.
 456     m{
 457       ^\s*
 458       (?:
 459          ( [\'\`\"] ) .* \1                             # already quoted
 460        | \` .* \'                                       # `quoted'
 461        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
 462        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
 463        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
 464        | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )?             # a number
 465        | 0x [a-fA-F\d]+                                 # a hex constant
 466       )
 467       \s*\z
 468      }xo && return $_;
 469
 470     # If we didn't return, go ahead and quote the text.
 471     return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
 472 }
 473
 474 # Handle links.  Since this is plain text, we can't actually make any real
 475 # links, so this is all to figure out what text we print out.  Most of the
 476 # work is done by Pod::ParseLink.
 477 sub seq_l {
 478     my ($self, $link, $seq) = @_;
 479     my ($text, $type) = (parselink ($link))[1,4];
 480     my ($file, $line) = $seq->file_line;
 481     $text = $self->interpolate ($text, $line);
 482     $text = '<' . $text . '>' if $type eq 'url';
 483     return $text || '';
 484 }
 485
 486
 487 ##############################################################################
 488 # Header handling
 489 ##############################################################################
 490
 491 # The common code for handling all headers.  Takes the interpolated header
 492 # text, the line number, the indentation, and the surrounding marker for the
 493 # alt formatting method.
 494 sub heading {
 495     my ($self, $text, $line, $indent, $marker) = @_;
 496     $self->item ("\n\n") if defined $$self{ITEM};
 497     $text =~ s/\s+$//;
 498     $text = $self->interpolate ($text, $line);
 499     if ($$self{alt}) {
 500         my $closemark = reverse (split (//, $marker));
 501         $self->output ("\n" . "$marker $text $closemark" . "\n\n");
 502     } else {
 503         $text .= "\n" if $$self{loose};
 504         $self->output (' ' x $indent . $text . "\n");
 505     }
 506 }
 507
 508
 509 ##############################################################################
 510 # List handling
 511 ##############################################################################
 512
 513 # This method is called whenever an =item command is complete (in other words,
 514 # we've seen its associated paragraph or know for certain that it doesn't have
 515 # one).  It gets the paragraph associated with the item as an argument.  If
 516 # that argument is empty, just output the item tag; if it contains a newline,
 517 # output the item tag followed by the newline.  Otherwise, see if there's
 518 # enough room for us to output the item tag in the margin of the text or if we
 519 # have to put it on a separate line.
 520 sub item {
 521     my $self = shift;
 522     local $_ = shift;
 523     my $tag = $$self{ITEM};
 524     unless (defined $tag) {
 525         carp "Item called without tag";
 526         return;
 527     }
 528     undef $$self{ITEM};
 529     my $indent = $$self{INDENTS}[-1];
 530     unless (defined $indent) { $indent = $$self{indent} }
 531     my $space = ' ' x $indent;
 532     $space =~ s/^ /:/ if $$self{alt};
 533     if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
 534         my $margin = $$self{MARGIN};
 535         $$self{MARGIN} = $indent;
 536         my $output = $self->reformat ($tag);
 537         $output =~ s/\n*$/\n/;
 538
 539         # If the text is just whitespace, we have an empty item paragraph;
 540         # this can result from =over/=item/=back without any intermixed
 541         # paragraphs.  Insert some whitespace to keep the =item from merging
 542         # into the next paragraph.
 543         $output .= "\n" if $_ && $_ =~ /^\s*$/;
 544
 545         $self->output ($output);
 546         $$self{MARGIN} = $margin;
 547         $self->output ($self->reformat ($_)) if $_ && /\S/;
 548     } else {
 549         $_ = $self->reformat ($_);
 550         s/^ /:/ if ($$self{alt} && $indent > 0);
 551         my $tagspace = ' ' x length $tag;
 552         s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
 553         $self->output ($_);
 554     }
 555 }
 556
 557
 558 ##############################################################################
 559 # Output formatting
 560 ##############################################################################
 561
 562 # Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
 563 # because it plays games with tabs.  We can't use formline, even though we'd
 564 # really like to, because it screws up non-printing characters.  So we have to
 565 # do the wrapping ourselves.
 566 sub wrap {
 567     my $self = shift;
 568     local $_ = shift;
 569     my $output = '';
 570     my $spaces = ' ' x $$self{MARGIN};
 571     my $width = $$self{width} - $$self{MARGIN};
 572     while (length > $width) {
 573         if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
 574             $output .= $spaces . $1 . "\n";
 575         } else {
 576             last;
 577         }
 578     }
 579     $output .= $spaces . $_;
 580     $output =~ s/\s+$/\n\n/;
 581     $output;
 582 }
 583
 584 # Reformat a paragraph of text for the current margin.  Takes the text to
 585 # reformat and returns the formatted text.
 586 sub reformat {
 587     my $self = shift;
 588     local $_ = shift;
 589
 590     # If we're trying to preserve two spaces after sentences, do some munging
 591     # to support that.  Otherwise, smash all repeated whitespace.
 592     if ($$self{sentence}) {
 593         s/ +$//mg;
 594         s/\.\n/. \n/g;
 595         s/\n/ /g;
 596         s/   +/  /g;
 597     } else {
 598         s/\s+/ /g;
 599     }
 600     $self->wrap ($_);
 601 }
 602
 603 # Output text to the output device.
 604 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
 605
 606 # Output a block of code (something that isn't part of the POD text).  Called
 607 # by preprocess_paragraph only if we were given the code option.  Exists here
 608 # only so that it can be overridden by subclasses.
 609 sub output_code { $_[0]->output ($_[1]) }
 610
 611
 612 ##############################################################################
 613 # Backwards compatibility
 614 ##############################################################################
 615
 616 # The old Pod::Text module did everything in a pod2text() function.  This
 617 # tries to provide the same interface for legacy applications.
 618 sub pod2text {
 619     my @args;
 620
 621     # This is really ugly; I hate doing option parsing in the middle of a
 622     # module.  But the old Pod::Text module supported passing flags to its
 623     # entry function, so handle -a and -<number>.
 624     while ($_[0] =~ /^-/) {
 625         my $flag = shift;
 626         if    ($flag eq '-a')       { push (@args, alt => 1)    }
 627         elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
 628         else {
 629             unshift (@_, $flag);
 630             last;
 631         }
 632     }
 633
 634     # Now that we know what arguments we're using, create the parser.
 635     my $parser = Pod::Text->new (@args);
 636
 637     # If two arguments were given, the second argument is going to be a file
 638     # handle.  That means we want to call parse_from_filehandle(), which means
 639     # we need to turn the first argument into a file handle.  Magic open will
 640     # handle the <&STDIN case automagically.
 641     if (defined $_[1]) {
 642         my @fhs = @_;
 643         local *IN;
 644         unless (open (IN, $fhs[0])) {
 645             croak ("Can't open $fhs[0] for reading: $!\n");
 646             return;
 647         }
 648         $fhs[0] = \*IN;
 649         return $parser->parse_from_filehandle (@fhs);
 650     } else {
 651         return $parser->parse_from_file (@_);
 652     }
 653 }
 654
 655
 656 ##############################################################################
 657 # Module return value and documentation
 658 ##############################################################################
 659
 660 1;
 661 __END__
 662
 663 =head1 NAME
 664
 665 Pod::Text - Convert POD data to formatted ASCII text
 666
 667 =head1 SYNOPSIS
 668
 669     use Pod::Text;
 670     my $parser = Pod::Text->new (sentence => 0, width => 78);
 671
 672     # Read POD from STDIN and write to STDOUT.
 673     $parser->parse_from_filehandle;
 674
 675     # Read POD from file.pod and write to file.txt.
 676     $parser->parse_from_file ('file.pod', 'file.txt');
 677
 678 =head1 DESCRIPTION
 679
 680 Pod::Text is a module that can convert documentation in the POD format (the
 681 preferred language for documenting Perl) into formatted ASCII.  It uses no
 682 special formatting controls or codes whatsoever, and its output is therefore
 683 suitable for nearly any device.
 684
 685 As a derived class from Pod::Parser, Pod::Text supports the same methods and
 686 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
 687 new parser with C<< Pod::Text->new() >> and then calls either
 688 parse_from_filehandle() or parse_from_file().
 689
 690 new() can take options, in the form of key/value pairs, that control the
 691 behavior of the parser.  The currently recognized options are:
 692
 693 =over 4
 694
 695 =item alt
 696
 697 If set to a true value, selects an alternate output format that, among other
 698 things, uses a different heading style and marks C<=item> entries with a
 699 colon in the left margin.  Defaults to false.
 700
 701 =item code
 702
 703 If set to a true value, the non-POD parts of the input file will be included
 704 in the output.  Useful for viewing code documented with POD blocks with the
 705 POD rendered and the code left intact.
 706
 707 =item indent
 708
 709 The number of spaces to indent regular text, and the default indentation for
 710 C<=over> blocks.  Defaults to 4.
 711
 712 =item loose
 713
 714 If set to a true value, a blank line is printed after a C<=head1> heading.
 715 If set to false (the default), no blank line is printed after C<=head1>,
 716 although one is still printed after C<=head2>.  This is the default because
 717 it's the expected formatting for manual pages; if you're formatting
 718 arbitrary text documents, setting this to true may result in more pleasing
 719 output.
 720
 721 =item quotes
 722
 723 Sets the quote marks used to surround CE<lt>> text.  If the value is a
 724 single character, it is used as both the left and right quote; if it is two
 725 characters, the first character is used as the left quote and the second as
 726 the right quoted; and if it is four characters, the first two are used as
 727 the left quote and the second two as the right quote.
 728
 729 This may also be set to the special value C<none>, in which case no quote
 730 marks are added around CE<lt>> text.
 731
 732 =item sentence
 733
 734 If set to a true value, Pod::Text will assume that each sentence ends in two
 735 spaces, and will try to preserve that spacing.  If set to false, all
 736 consecutive whitespace in non-verbatim paragraphs is compressed into a
 737 single space.  Defaults to true.
 738
 739 =item width
 740
 741 The column at which to wrap text on the right-hand side.  Defaults to 76.
 742
 743 =back
 744
 745 The standard Pod::Parser method parse_from_filehandle() takes up to two
 746 arguments, the first being the file handle to read POD from and the second
 747 being the file handle to write the formatted output to.  The first defaults
 748 to STDIN if not given, and the second defaults to STDOUT.  The method
 749 parse_from_file() is almost identical, except that its two arguments are the
 750 input and output disk files instead.  See L<Pod::Parser> for the specific
 751 details.
 752
 753 =head1 DIAGNOSTICS
 754
 755 =over 4
 756
 757 =item Bizarre space in item
 758
 759 =item Item called without tag
 760
 761 (W) Something has gone wrong in internal C<=item> processing.  These
 762 messages indicate a bug in Pod::Text; you should never see them.
 763
 764 =item Can't open %s for reading: %s
 765
 766 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
 767 and the input file it was given could not be opened.
 768
 769 =item Invalid quote specification "%s"
 770
 771 (F) The quote specification given (the quotes option to the constructor) was
 772 invalid.  A quote specification must be one, two, or four characters long.
 773
 774 =item %s:%d: Unknown command paragraph "%s".
 775
 776 (W) The POD source contained a non-standard command paragraph (something of
 777 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
 778
 779 =item %s:%d: Unknown escape: %s
 780
 781 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
 782 know about.
 783
 784 =item %s:%d: Unknown formatting code: %s
 785
 786 (W) The POD source contained a non-standard formatting code (something of
 787 the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
 788
 789 =item %s:%d: Unmatched =back
 790
 791 (W) Pod::Text encountered a C<=back> command that didn't correspond to an
 792 C<=over> command.
 793
 794 =back
 795
 796 =head1 RESTRICTIONS
 797
 798 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
 799 output, due to an internal implementation detail.
 800
 801 =head1 NOTES
 802
 803 This is a replacement for an earlier Pod::Text module written by Tom
 804 Christiansen.  It has a revamped interface, since it now uses Pod::Parser,
 805 but an interface roughly compatible with the old Pod::Text::pod2text()
 806 function is still available.  Please change to the new calling convention,
 807 though.
 808
 809 The original Pod::Text contained code to do formatting via termcap
 810 sequences, although it wasn't turned on by default and it was problematic to
 811 get it to work at all.  This rewrite doesn't even try to do that, but a
 812 subclass of it does.  Look for L<Pod::Text::Termcap>.
 813
 814 =head1 SEE ALSO
 815
 816 L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
 817
 818 =head1 AUTHOR
 819
 820 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
 821 Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
 822 Pod::Parser by Brad Appleton <bradapp@enteract.com>.
 823
 824 =head1 COPYRIGHT AND LICENSE
 825
 826 Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
 827
 828 This program is free software; you may redistribute it and/or modify it
 829 under the same terms as Perl itself.
 830
 831 =cut