lib/Pod/Text.pm

   1 # Pod::Text -- Convert POD data to formatted ASCII text.
   2 # $Id: Text.pm,v 2.15 2001/11/23 06:14:10 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module replaces the old Pod::Text that came with versions of Perl prior
  10 # to 5.6.0, and attempts to match its output except for some specific
  11 # circumstances where other decisions seemed to produce better output.  It
  12 # uses Pod::Parser and is designed to be very easy to subclass.
  13 #
  14 # Perl core hackers, please note that this module is also separately
  15 # maintained outside of the Perl core as part of the podlators.  Please send
  16 # me any patches at the address above in addition to sending them to the
  17 # standard Perl mailing lists.
  18
  19 ##############################################################################
  20 # Modules and declarations
  21 ##############################################################################
  22
  23 package Pod::Text;
  24
  25 require 5.004;
  26
  27 use Carp qw(carp croak);
  28 use Exporter ();
  29 use Pod::ParseLink qw(parselink);
  30 use Pod::Select ();
  31
  32 use strict;
  33 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
  34
  35 # We inherit from Pod::Select instead of Pod::Parser so that we can be used by
  36 # Pod::Usage.
  37 @ISA = qw(Pod::Select Exporter);
  38
  39 # We have to export pod2text for backward compatibility.
  40 @EXPORT = qw(pod2text);
  41
  42 # Don't use the CVS revision as the version, since this module is also in Perl
  43 # core and too many things could munge CVS magic revision strings.  This
  44 # number should ideally be the same as the CVS revision in podlators, however.
  45 $VERSION = 2.15;
  46
  47
  48 ##############################################################################
  49 # Table of supported E<> escapes
  50 ##############################################################################
  51
  52 # This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
  53 # got it near verbatim from the original Pod::Text.  It is therefore credited
  54 # to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
  55 # "divide" added by Tim Jenness.
  56 %ESCAPES = (
  57     'amp'       =>    '&',      # ampersand
  58     'apos'      =>    "'",      # apostrophe
  59     'lt'        =>    '<',      # left chevron, less-than
  60     'gt'        =>    '>',      # right chevron, greater-than
  61     'quot'      =>    '"',      # double quote
  62     'sol'       =>    '/',      # solidus (forward slash)
  63     'verbar'    =>    '|',      # vertical bar
  64
  65     "Aacute"    =>    "\xC1",   # capital A, acute accent
  66     "aacute"    =>    "\xE1",   # small a, acute accent
  67     "Acirc"     =>    "\xC2",   # capital A, circumflex accent
  68     "acirc"     =>    "\xE2",   # small a, circumflex accent
  69     "AElig"     =>    "\xC6",   # capital AE diphthong (ligature)
  70     "aelig"     =>    "\xE6",   # small ae diphthong (ligature)
  71     "Agrave"    =>    "\xC0",   # capital A, grave accent
  72     "agrave"    =>    "\xE0",   # small a, grave accent
  73     "Aring"     =>    "\xC5",   # capital A, ring
  74     "aring"     =>    "\xE5",   # small a, ring
  75     "Atilde"    =>    "\xC3",   # capital A, tilde
  76     "atilde"    =>    "\xE3",   # small a, tilde
  77     "Auml"      =>    "\xC4",   # capital A, dieresis or umlaut mark
  78     "auml"      =>    "\xE4",   # small a, dieresis or umlaut mark
  79     "Ccedil"    =>    "\xC7",   # capital C, cedilla
  80     "ccedil"    =>    "\xE7",   # small c, cedilla
  81     "Eacute"    =>    "\xC9",   # capital E, acute accent
  82     "eacute"    =>    "\xE9",   # small e, acute accent
  83     "Ecirc"     =>    "\xCA",   # capital E, circumflex accent
  84     "ecirc"     =>    "\xEA",   # small e, circumflex accent
  85     "Egrave"    =>    "\xC8",   # capital E, grave accent
  86     "egrave"    =>    "\xE8",   # small e, grave accent
  87     "ETH"       =>    "\xD0",   # capital Eth, Icelandic
  88     "eth"       =>    "\xF0",   # small eth, Icelandic
  89     "Euml"      =>    "\xCB",   # capital E, dieresis or umlaut mark
  90     "euml"      =>    "\xEB",   # small e, dieresis or umlaut mark
  91     "Iacute"    =>    "\xCD",   # capital I, acute accent
  92     "iacute"    =>    "\xED",   # small i, acute accent
  93     "Icirc"     =>    "\xCE",   # capital I, circumflex accent
  94     "icirc"     =>    "\xEE",   # small i, circumflex accent
  95     "Igrave"    =>    "\xCC",   # capital I, grave accent
  96     "igrave"    =>    "\xEC",   # small i, grave accent
  97     "Iuml"      =>    "\xCF",   # capital I, dieresis or umlaut mark
  98     "iuml"      =>    "\xEF",   # small i, dieresis or umlaut mark
  99     "Ntilde"    =>    "\xD1",   # capital N, tilde
 100     "ntilde"    =>    "\xF1",   # small n, tilde
 101     "Oacute"    =>    "\xD3",   # capital O, acute accent
 102     "oacute"    =>    "\xF3",   # small o, acute accent
 103     "Ocirc"     =>    "\xD4",   # capital O, circumflex accent
 104     "ocirc"     =>    "\xF4",   # small o, circumflex accent
 105     "Ograve"    =>    "\xD2",   # capital O, grave accent
 106     "ograve"    =>    "\xF2",   # small o, grave accent
 107     "Oslash"    =>    "\xD8",   # capital O, slash
 108     "oslash"    =>    "\xF8",   # small o, slash
 109     "Otilde"    =>    "\xD5",   # capital O, tilde
 110     "otilde"    =>    "\xF5",   # small o, tilde
 111     "Ouml"      =>    "\xD6",   # capital O, dieresis or umlaut mark
 112     "ouml"      =>    "\xF6",   # small o, dieresis or umlaut mark
 113     "szlig"     =>    "\xDF",   # small sharp s, German (sz ligature)
 114     "THORN"     =>    "\xDE",   # capital THORN, Icelandic
 115     "thorn"     =>    "\xFE",   # small thorn, Icelandic
 116     "Uacute"    =>    "\xDA",   # capital U, acute accent
 117     "uacute"    =>    "\xFA",   # small u, acute accent
 118     "Ucirc"     =>    "\xDB",   # capital U, circumflex accent
 119     "ucirc"     =>    "\xFB",   # small u, circumflex accent
 120     "Ugrave"    =>    "\xD9",   # capital U, grave accent
 121     "ugrave"    =>    "\xF9",   # small u, grave accent
 122     "Uuml"      =>    "\xDC",   # capital U, dieresis or umlaut mark
 123     "uuml"      =>    "\xFC",   # small u, dieresis or umlaut mark
 124     "Yacute"    =>    "\xDD",   # capital Y, acute accent
 125     "yacute"    =>    "\xFD",   # small y, acute accent
 126     "yuml"      =>    "\xFF",   # small y, dieresis or umlaut mark
 127
 128     "laquo"     =>    "\xAB",   # left pointing double angle quotation mark
 129     "lchevron"  =>    "\xAB",   #  synonym (backwards compatibility)
 130     "raquo"     =>    "\xBB",   # right pointing double angle quotation mark
 131     "rchevron"  =>    "\xBB",   #  synonym (backwards compatibility)
 132
 133     "iexcl"     =>    "\xA1",   # inverted exclamation mark
 134     "cent"      =>    "\xA2",   # cent sign
 135     "pound"     =>    "\xA3",   # (UK) pound sign
 136     "curren"    =>    "\xA4",   # currency sign
 137     "yen"       =>    "\xA5",   # yen sign
 138     "brvbar"    =>    "\xA6",   # broken vertical bar
 139     "sect"      =>    "\xA7",   # section sign
 140     "uml"       =>    "\xA8",   # diaresis
 141     "copy"      =>    "\xA9",   # Copyright symbol
 142     "ordf"      =>    "\xAA",   # feminine ordinal indicator
 143     "not"       =>    "\xAC",   # not sign
 144     "shy"       =>    '',       # soft (discretionary) hyphen
 145     "reg"       =>    "\xAE",   # registered trademark
 146     "macr"      =>    "\xAF",   # macron, overline
 147     "deg"       =>    "\xB0",   # degree sign
 148     "plusmn"    =>    "\xB1",   # plus-minus sign
 149     "sup2"      =>    "\xB2",   # superscript 2
 150     "sup3"      =>    "\xB3",   # superscript 3
 151     "acute"     =>    "\xB4",   # acute accent
 152     "micro"     =>    "\xB5",   # micro sign
 153     "para"      =>    "\xB6",   # pilcrow sign = paragraph sign
 154     "middot"    =>    "\xB7",   # middle dot = Georgian comma
 155     "cedil"     =>    "\xB8",   # cedilla
 156     "sup1"      =>    "\xB9",   # superscript 1
 157     "ordm"      =>    "\xBA",   # masculine ordinal indicator
 158     "frac14"    =>    "\xBC",   # vulgar fraction one quarter
 159     "frac12"    =>    "\xBD",   # vulgar fraction one half
 160     "frac34"    =>    "\xBE",   # vulgar fraction three quarters
 161     "iquest"    =>    "\xBF",   # inverted question mark
 162     "times"     =>    "\xD7",   # multiplication sign
 163     "divide"    =>    "\xF7",   # division sign
 164
 165     "nbsp"      =>    "\x01",   # non-breaking space
 166 );
 167
 168
 169 ##############################################################################
 170 # Initialization
 171 ##############################################################################
 172
 173 # Initialize the object.  Must be sure to call our parent initializer.
 174 sub initialize {
 175     my $self = shift;
 176
 177     $$self{alt}      = 0  unless defined $$self{alt};
 178     $$self{indent}   = 4  unless defined $$self{indent};
 179     $$self{loose}    = 0  unless defined $$self{loose};
 180     $$self{sentence} = 0  unless defined $$self{sentence};
 181     $$self{width}    = 76 unless defined $$self{width};
 182
 183     # Figure out what quotes we'll be using for C<> text.
 184     $$self{quotes} ||= '"';
 185     if ($$self{quotes} eq 'none') {
 186         $$self{LQUOTE} = $$self{RQUOTE} = '';
 187     } elsif (length ($$self{quotes}) == 1) {
 188         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 189     } elsif ($$self{quotes} =~ /^(.)(.)$/
 190              || $$self{quotes} =~ /^(..)(..)$/) {
 191         $$self{LQUOTE} = $1;
 192         $$self{RQUOTE} = $2;
 193     } else {
 194         croak qq(Invalid quote specification "$$self{quotes}");
 195     }
 196
 197     $$self{INDENTS}  = [];              # Stack of indentations.
 198     $$self{MARGIN}   = $$self{indent};  # Current left margin in spaces.
 199
 200     $self->SUPER::initialize;
 201
 202     # Tell Pod::Parser that we want the non-POD stuff too if code was set.
 203     $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
 204 }
 205
 206
 207 ##############################################################################
 208 # Core overrides
 209 ##############################################################################
 210
 211 # Called for each command paragraph.  Gets the command, the associated
 212 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 213 # the command to a method named the same as the command.  =cut is handled
 214 # internally by Pod::Parser.
 215 sub command {
 216     my $self = shift;
 217     my $command = shift;
 218     return if $command eq 'pod';
 219     return if ($$self{EXCLUDE} && $command ne 'end');
 220     if ($self->can ('cmd_' . $command)) {
 221         $command = 'cmd_' . $command;
 222         $self->$command (@_);
 223     } else {
 224         my ($text, $line, $paragraph) = @_;
 225         my $file;
 226         ($file, $line) = $paragraph->file_line;
 227         $text =~ s/\n+\z//;
 228         $text = " $text" if ($text =~ /^\S/);
 229         warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
 230         return;
 231     }
 232 }
 233
 234 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 235 # Pod::Paragraph object.  Just output it verbatim, but with tabs converted to
 236 # spaces.
 237 sub verbatim {
 238     my $self = shift;
 239     return if $$self{EXCLUDE};
 240     $self->item if defined $$self{ITEM};
 241     local $_ = shift;
 242     return if /^\s*$/;
 243     s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
 244     $self->output ($_);
 245 }
 246
 247 # Called for a regular text block.  Gets the paragraph, the line number, and a
 248 # Pod::Paragraph object.  Perform interpolation and output the results.
 249 sub textblock {
 250     my $self = shift;
 251     return if $$self{EXCLUDE};
 252     $self->output ($_[0]), return if $$self{VERBATIM};
 253     local $_ = shift;
 254     my $line = shift;
 255
 256     # Interpolate and output the paragraph.
 257     $_ = $self->interpolate ($_, $line);
 258     s/\s+$/\n/;
 259     if (defined $$self{ITEM}) {
 260         $self->item ($_ . "\n");
 261     } else {
 262         $self->output ($self->reformat ($_ . "\n"));
 263     }
 264 }
 265
 266 # Called for an interior sequence.  Gets the command, argument, and a
 267 # Pod::InteriorSequence object and is expected to return the resulting text.
 268 # Calls code, bold, italic, file, and link to handle those types of sequences,
 269 # and handles S<>, E<>, X<>, and Z<> directly.
 270 sub interior_sequence {
 271     local $_;
 272     my ($self, $command, $seq);
 273     ($self, $command, $_, $seq) = @_;
 274
 275     # We have to defer processing of the inside of an L<> formatting code.  If
 276     # this sequence is nested inside an L<> sequence, return the literal raw
 277     # text of it.
 278     my $parent = $seq->nested;
 279     while (defined $parent) {
 280         return $seq->raw_text if ($parent->cmd_name eq 'L');
 281         $parent = $parent->nested;
 282     }
 283
 284     # Index entries are ignored in plain text.
 285     return '' if ($command eq 'X' || $command eq 'Z');
 286
 287     # Expand escapes into the actual character now, warning if invalid.
 288     if ($command eq 'E') {
 289         if (/^\d+$/) {
 290             return chr;
 291         } else {
 292             return $ESCAPES{$_} if defined $ESCAPES{$_};
 293             my $seq = shift;
 294             my ($file, $line) = $seq->file_line;
 295             warn "$file:$line: Unknown escape: E<$_>\n";
 296             return "E<$_>";
 297         }
 298     }
 299
 300     # For all the other sequences, empty content produces no output.
 301     return if $_ eq '';
 302
 303     # For S<>, compress all internal whitespace and then map spaces to \01.
 304     # When we output the text, we'll map this back.
 305     if ($command eq 'S') {
 306         s/\s+/ /g;
 307         tr/ /\01/;
 308         return $_;
 309     }
 310
 311     # Anything else needs to get dispatched to another method.
 312     if    ($command eq 'B') { return $self->seq_b ($_) }
 313     elsif ($command eq 'C') { return $self->seq_c ($_) }
 314     elsif ($command eq 'F') { return $self->seq_f ($_) }
 315     elsif ($command eq 'I') { return $self->seq_i ($_) }
 316     elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
 317     else {
 318         my $seq = shift;
 319         my ($file, $line) = $seq->file_line;
 320         warn "$file:$line: Unknown sequence $command<$_>\n";
 321     }
 322 }
 323
 324 # Called for each paragraph that's actually part of the POD.  We take
 325 # advantage of this opportunity to untabify the input.  Also, if given the
 326 # code option, we may see paragraphs that aren't part of the POD and need to
 327 # output them directly.
 328 sub preprocess_paragraph {
 329     my $self = shift;
 330     local $_ = shift;
 331     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 332     $self->output_code ($_) if $self->cutting;
 333     $_;
 334 }
 335
 336
 337 ##############################################################################
 338 # Command paragraphs
 339 ##############################################################################
 340
 341 # All command paragraphs take the paragraph and the line number.
 342
 343 # First level heading.
 344 sub cmd_head1 {
 345     my ($self, $text, $line) = @_;
 346     $self->heading ($text, $line, 0, '====');
 347 }
 348
 349 # Second level heading.
 350 sub cmd_head2 {
 351     my ($self, $text, $line) = @_;
 352     $self->heading ($text, $line, $$self{indent} / 2, '==  ');
 353 }
 354
 355 # Third level heading.
 356 sub cmd_head3 {
 357     my ($self, $text, $line) = @_;
 358     $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '=   ');
 359 }
 360
 361 # Third level heading.
 362 sub cmd_head4 {
 363     my ($self, $text, $line) = @_;
 364     $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '-   ');
 365 }
 366
 367 # Start a list.
 368 sub cmd_over {
 369     my $self = shift;
 370     local $_ = shift;
 371     $self->item ("\n\n") if defined $$self{ITEM};
 372     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 373     push (@{ $$self{INDENTS} }, $$self{MARGIN});
 374     $$self{MARGIN} += ($_ + 0);
 375 }
 376
 377 # End a list.
 378 sub cmd_back {
 379     my ($self, $text, $line, $paragraph) = @_;
 380     $self->item ("\n\n") if defined $$self{ITEM};
 381     $$self{MARGIN} = pop @{ $$self{INDENTS} };
 382     unless (defined $$self{MARGIN}) {
 383         my $file;
 384         ($file, $line) = $paragraph->file_line;
 385         warn "$file:$line: Unmatched =back\n";
 386         $$self{MARGIN} = $$self{indent};
 387     }
 388 }
 389
 390 # An individual list item.
 391 sub cmd_item {
 392     my $self = shift;
 393     if (defined $$self{ITEM}) { $self->item }
 394     local $_ = shift;
 395     s/\s+$//;
 396     $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
 397 }
 398
 399 # Begin a block for a particular translator.  Setting VERBATIM triggers
 400 # special handling in textblock().
 401 sub cmd_begin {
 402     my $self = shift;
 403     local $_ = shift;
 404     my ($kind) = /^(\S+)/ or return;
 405     if ($kind eq 'text') {
 406         $$self{VERBATIM} = 1;
 407     } else {
 408         $$self{EXCLUDE} = 1;
 409     }
 410 }
 411
 412 # End a block for a particular translator.  We assume that all =begin/=end
 413 # pairs are properly closed.
 414 sub cmd_end {
 415     my $self = shift;
 416     $$self{EXCLUDE} = 0;
 417     $$self{VERBATIM} = 0;
 418 }
 419
 420 # One paragraph for a particular translator.  Ignore it unless it's intended
 421 # for text, in which case we treat it as a verbatim text block.
 422 sub cmd_for {
 423     my $self = shift;
 424     local $_ = shift;
 425     my $line = shift;
 426     return unless s/^text\b[ \t]*\n?//;
 427     $self->verbatim ($_, $line);
 428 }
 429
 430
 431 ##############################################################################
 432 # Interior sequences
 433 ##############################################################################
 434
 435 # The simple formatting ones.  These are here mostly so that subclasses can
 436 # override them and do more complicated things.
 437 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
 438 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
 439 sub seq_i { return '*' . $_[1] . '*' }
 440
 441 # Apply a whole bunch of messy heuristics to not quote things that don't
 442 # benefit from being quoted.  These originally come from Barrie Slaymaker and
 443 # largely duplicate code in Pod::Man.
 444 sub seq_c {
 445     my $self = shift;
 446     local $_ = shift;
 447
 448     # A regex that matches the portion of a variable reference that's the
 449     # array or hash index, separated out just because we want to use it in
 450     # several places in the following regex.
 451     my $index = '(?: \[.*\] | \{.*\} )?';
 452
 453     # Check for things that we don't want to quote, and if we find any of
 454     # them, return the string with just a font change and no quoting.
 455     m{
 456       ^\s*
 457       (?:
 458          ( [\'\`\"] ) .* \1                             # already quoted
 459        | \` .* \'                                       # `quoted'
 460        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
 461        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
 462        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
 463        | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )?             # a number
 464        | 0x [a-fA-F\d]+                                 # a hex constant
 465       )
 466       \s*\z
 467      }xo && return $_;
 468
 469     # If we didn't return, go ahead and quote the text.
 470     return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
 471 }
 472
 473 # Handle links.  Since this is plain text, we can't actually make any real
 474 # links, so this is all to figure out what text we print out.  Most of the
 475 # work is done by Pod::ParseLink.
 476 sub seq_l {
 477     my ($self, $link, $seq) = @_;
 478     my ($text, $type) = (parselink ($link))[1,4];
 479     my ($file, $line) = $seq->file_line;
 480     $text = $self->interpolate ($text, $line);
 481     $text = '<' . $text . '>' if $type eq 'url';
 482     return $text || '';
 483 }
 484
 485
 486 ##############################################################################
 487 # Header handling
 488 ##############################################################################
 489
 490 # The common code for handling all headers.  Takes the interpolated header
 491 # text, the line number, the indentation, and the surrounding marker for the
 492 # alt formatting method.
 493 sub heading {
 494     my ($self, $text, $line, $indent, $marker) = @_;
 495     $self->item ("\n\n") if defined $$self{ITEM};
 496     $text =~ s/\s+$//;
 497     $text = $self->interpolate ($text, $line);
 498     if ($$self{alt}) {
 499         my $closemark = reverse (split (//, $marker));
 500         $self->output ("\n" . "$marker $text $closemark" . "\n\n");
 501     } else {
 502         $text .= "\n" if $$self{loose};
 503         $self->output (' ' x $indent . $text . "\n");
 504     }
 505 }
 506
 507
 508 ##############################################################################
 509 # List handling
 510 ##############################################################################
 511
 512 # This method is called whenever an =item command is complete (in other words,
 513 # we've seen its associated paragraph or know for certain that it doesn't have
 514 # one).  It gets the paragraph associated with the item as an argument.  If
 515 # that argument is empty, just output the item tag; if it contains a newline,
 516 # output the item tag followed by the newline.  Otherwise, see if there's
 517 # enough room for us to output the item tag in the margin of the text or if we
 518 # have to put it on a separate line.
 519 sub item {
 520     my $self = shift;
 521     local $_ = shift;
 522     my $tag = $$self{ITEM};
 523     unless (defined $tag) {
 524         carp "Item called without tag";
 525         return;
 526     }
 527     undef $$self{ITEM};
 528     my $indent = $$self{INDENTS}[-1];
 529     unless (defined $indent) { $indent = $$self{indent} }
 530     my $space = ' ' x $indent;
 531     $space =~ s/^ /:/ if $$self{alt};
 532     if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
 533         my $margin = $$self{MARGIN};
 534         $$self{MARGIN} = $indent;
 535         my $output = $self->reformat ($tag);
 536         $output =~ s/\n*$/\n/;
 537
 538         # If the text is just whitespace, we have an empty item paragraph;
 539         # this can result from =over/=item/=back without any intermixed
 540         # paragraphs.  Insert some whitespace to keep the =item from merging
 541         # into the next paragraph.
 542         $output .= "\n" if $_ && $_ =~ /^\s*$/;
 543
 544         $self->output ($output);
 545         $$self{MARGIN} = $margin;
 546         $self->output ($self->reformat ($_)) if $_ && /\S/;
 547     } else {
 548         $_ = $self->reformat ($_);
 549         s/^ /:/ if ($$self{alt} && $indent > 0);
 550         my $tagspace = ' ' x length $tag;
 551         s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
 552         $self->output ($_);
 553     }
 554 }
 555
 556
 557 ##############################################################################
 558 # Output formatting
 559 ##############################################################################
 560
 561 # Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
 562 # because it plays games with tabs.  We can't use formline, even though we'd
 563 # really like to, because it screws up non-printing characters.  So we have to
 564 # do the wrapping ourselves.
 565 sub wrap {
 566     my $self = shift;
 567     local $_ = shift;
 568     my $output = '';
 569     my $spaces = ' ' x $$self{MARGIN};
 570     my $width = $$self{width} - $$self{MARGIN};
 571     while (length > $width) {
 572         if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
 573             $output .= $spaces . $1 . "\n";
 574         } else {
 575             last;
 576         }
 577     }
 578     $output .= $spaces . $_;
 579     $output =~ s/\s+$/\n\n/;
 580     $output;
 581 }
 582
 583 # Reformat a paragraph of text for the current margin.  Takes the text to
 584 # reformat and returns the formatted text.
 585 sub reformat {
 586     my $self = shift;
 587     local $_ = shift;
 588
 589     # If we're trying to preserve two spaces after sentences, do some munging
 590     # to support that.  Otherwise, smash all repeated whitespace.
 591     if ($$self{sentence}) {
 592         s/ +$//mg;
 593         s/\.\n/. \n/g;
 594         s/\n/ /g;
 595         s/   +/  /g;
 596     } else {
 597         s/\s+/ /g;
 598     }
 599     $self->wrap ($_);
 600 }
 601
 602 # Output text to the output device.
 603 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
 604
 605 # Output a block of code (something that isn't part of the POD text).  Called
 606 # by preprocess_paragraph only if we were given the code option.  Exists here
 607 # only so that it can be overridden by subclasses.
 608 sub output_code { $_[0]->output ($_[1]) }
 609
 610
 611 ##############################################################################
 612 # Backwards compatibility
 613 ##############################################################################
 614
 615 # The old Pod::Text module did everything in a pod2text() function.  This
 616 # tries to provide the same interface for legacy applications.
 617 sub pod2text {
 618     my @args;
 619
 620     # This is really ugly; I hate doing option parsing in the middle of a
 621     # module.  But the old Pod::Text module supported passing flags to its
 622     # entry function, so handle -a and -<number>.
 623     while ($_[0] =~ /^-/) {
 624         my $flag = shift;
 625         if    ($flag eq '-a')       { push (@args, alt => 1)    }
 626         elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
 627         else {
 628             unshift (@_, $flag);
 629             last;
 630         }
 631     }
 632
 633     # Now that we know what arguments we're using, create the parser.
 634     my $parser = Pod::Text->new (@args);
 635
 636     # If two arguments were given, the second argument is going to be a file
 637     # handle.  That means we want to call parse_from_filehandle(), which means
 638     # we need to turn the first argument into a file handle.  Magic open will
 639     # handle the <&STDIN case automagically.
 640     if (defined $_[1]) {
 641         my @fhs = @_;
 642         local *IN;
 643         unless (open (IN, $fhs[0])) {
 644             croak ("Can't open $fhs[0] for reading: $!\n");
 645             return;
 646         }
 647         $fhs[0] = \*IN;
 648         return $parser->parse_from_filehandle (@fhs);
 649     } else {
 650         return $parser->parse_from_file (@_);
 651     }
 652 }
 653
 654
 655 ##############################################################################
 656 # Module return value and documentation
 657 ##############################################################################
 658
 659 1;
 660 __END__
 661
 662 =head1 NAME
 663
 664 Pod::Text - Convert POD data to formatted ASCII text
 665
 666 =head1 SYNOPSIS
 667
 668     use Pod::Text;
 669     my $parser = Pod::Text->new (sentence => 0, width => 78);
 670
 671     # Read POD from STDIN and write to STDOUT.
 672     $parser->parse_from_filehandle;
 673
 674     # Read POD from file.pod and write to file.txt.
 675     $parser->parse_from_file ('file.pod', 'file.txt');
 676
 677 =head1 DESCRIPTION
 678
 679 Pod::Text is a module that can convert documentation in the POD format (the
 680 preferred language for documenting Perl) into formatted ASCII.  It uses no
 681 special formatting controls or codes whatsoever, and its output is therefore
 682 suitable for nearly any device.
 683
 684 As a derived class from Pod::Parser, Pod::Text supports the same methods and
 685 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
 686 new parser with C<< Pod::Text->new() >> and then calls either
 687 parse_from_filehandle() or parse_from_file().
 688
 689 new() can take options, in the form of key/value pairs, that control the
 690 behavior of the parser.  The currently recognized options are:
 691
 692 =over 4
 693
 694 =item alt
 695
 696 If set to a true value, selects an alternate output format that, among other
 697 things, uses a different heading style and marks C<=item> entries with a
 698 colon in the left margin.  Defaults to false.
 699
 700 =item code
 701
 702 If set to a true value, the non-POD parts of the input file will be included
 703 in the output.  Useful for viewing code documented with POD blocks with the
 704 POD rendered and the code left intact.
 705
 706 =item indent
 707
 708 The number of spaces to indent regular text, and the default indentation for
 709 C<=over> blocks.  Defaults to 4.
 710
 711 =item loose
 712
 713 If set to a true value, a blank line is printed after a C<=head1> heading.
 714 If set to false (the default), no blank line is printed after C<=head1>,
 715 although one is still printed after C<=head2>.  This is the default because
 716 it's the expected formatting for manual pages; if you're formatting
 717 arbitrary text documents, setting this to true may result in more pleasing
 718 output.
 719
 720 =item quotes
 721
 722 Sets the quote marks used to surround CE<lt>> text.  If the value is a
 723 single character, it is used as both the left and right quote; if it is two
 724 characters, the first character is used as the left quote and the second as
 725 the right quoted; and if it is four characters, the first two are used as
 726 the left quote and the second two as the right quote.
 727
 728 This may also be set to the special value C<none>, in which case no quote
 729 marks are added around CE<lt>> text.
 730
 731 =item sentence
 732
 733 If set to a true value, Pod::Text will assume that each sentence ends in two
 734 spaces, and will try to preserve that spacing.  If set to false, all
 735 consecutive whitespace in non-verbatim paragraphs is compressed into a
 736 single space.  Defaults to true.
 737
 738 =item width
 739
 740 The column at which to wrap text on the right-hand side.  Defaults to 76.
 741
 742 =back
 743
 744 The standard Pod::Parser method parse_from_filehandle() takes up to two
 745 arguments, the first being the file handle to read POD from and the second
 746 being the file handle to write the formatted output to.  The first defaults
 747 to STDIN if not given, and the second defaults to STDOUT.  The method
 748 parse_from_file() is almost identical, except that its two arguments are the
 749 input and output disk files instead.  See L<Pod::Parser> for the specific
 750 details.
 751
 752 =head1 DIAGNOSTICS
 753
 754 =over 4
 755
 756 =item Bizarre space in item
 757
 758 =item Item called without tag
 759
 760 (W) Something has gone wrong in internal C<=item> processing.  These
 761 messages indicate a bug in Pod::Text; you should never see them.
 762
 763 =item Can't open %s for reading: %s
 764
 765 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
 766 and the input file it was given could not be opened.
 767
 768 =item Invalid quote specification "%s"
 769
 770 (F) The quote specification given (the quotes option to the constructor) was
 771 invalid.  A quote specification must be one, two, or four characters long.
 772
 773 =item %s:%d: Unknown command paragraph "%s".
 774
 775 (W) The POD source contained a non-standard command paragraph (something of
 776 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
 777
 778 =item %s:%d: Unknown escape: %s
 779
 780 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
 781 know about.
 782
 783 =item %s:%d: Unknown sequence: %s
 784
 785 (W) The POD source contained a non-standard internal sequence (something of
 786 the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
 787
 788 =item %s:%d: Unmatched =back
 789
 790 (W) Pod::Text encountered a C<=back> command that didn't correspond to an
 791 C<=over> command.
 792
 793 =back
 794
 795 =head1 RESTRICTIONS
 796
 797 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
 798 output, due to an internal implementation detail.
 799
 800 =head1 NOTES
 801
 802 This is a replacement for an earlier Pod::Text module written by Tom
 803 Christiansen.  It has a revamped interface, since it now uses Pod::Parser,
 804 but an interface roughly compatible with the old Pod::Text::pod2text()
 805 function is still available.  Please change to the new calling convention,
 806 though.
 807
 808 The original Pod::Text contained code to do formatting via termcap
 809 sequences, although it wasn't turned on by default and it was problematic to
 810 get it to work at all.  This rewrite doesn't even try to do that, but a
 811 subclass of it does.  Look for L<Pod::Text::Termcap>.
 812
 813 =head1 SEE ALSO
 814
 815 L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
 816
 817 =head1 AUTHOR
 818
 819 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
 820 Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
 821 Pod::Parser by Brad Appleton <bradapp@enteract.com>.
 822
 823 =head1 COPYRIGHT AND LICENSE
 824
 825 Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
 826
 827 This program is free software; you may redistribute it and/or modify it
 828 under the same terms as Perl itself.
 829
 830 =cut