lib/Pod/Text.pm

   1 # Pod::Text -- Convert POD data to formatted ASCII text.
   2 # $Id: Text.pm,v 2.14 2001/11/15 08:03:18 eagle Exp $
   3 #
   4 # Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
   5 #
   6 # This program is free software; you may redistribute it and/or modify it
   7 # under the same terms as Perl itself.
   8 #
   9 # This module replaces the old Pod::Text that came with versions of Perl prior
  10 # to 5.6.0, and attempts to match its output except for some specific
  11 # circumstances where other decisions seemed to produce better output.  It
  12 # uses Pod::Parser and is designed to be very easy to subclass.
  13 #
  14 # Perl core hackers, please note that this module is also separately
  15 # maintained outside of the Perl core as part of the podlators.  Please send
  16 # me any patches at the address above in addition to sending them to the
  17 # standard Perl mailing lists.
  18
  19 ##############################################################################
  20 # Modules and declarations
  21 ##############################################################################
  22
  23 package Pod::Text;
  24
  25 require 5.004;
  26
  27 use Carp qw(carp croak);
  28 use Exporter ();
  29 use Pod::ParseLink qw(parselink);
  30 use Pod::Select ();
  31
  32 use strict;
  33 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
  34
  35 # We inherit from Pod::Select instead of Pod::Parser so that we can be used by
  36 # Pod::Usage.
  37 @ISA = qw(Pod::Select Exporter);
  38
  39 # We have to export pod2text for backward compatibility.
  40 @EXPORT = qw(pod2text);
  41
  42 # Don't use the CVS revision as the version, since this module is also in Perl
  43 # core and too many things could munge CVS magic revision strings.  This
  44 # number should ideally be the same as the CVS revision in podlators, however.
  45 $VERSION = 2.14;
  46
  47
  48 ##############################################################################
  49 # Table of supported E<> escapes
  50 ##############################################################################
  51
  52 # This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
  53 # got it near verbatim from the original Pod::Text.  It is therefore credited
  54 # to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
  55 # "divide" added by Tim Jenness.
  56 %ESCAPES = (
  57     'amp'       =>    '&',      # ampersand
  58     'apos'      =>    "'",      # apostrophe
  59     'lt'        =>    '<',      # left chevron, less-than
  60     'gt'        =>    '>',      # right chevron, greater-than
  61     'quot'      =>    '"',      # double quote
  62     'sol'       =>    '/',      # solidus (forward slash)
  63     'verbar'    =>    '|',      # vertical bar
  64
  65     "Aacute"    =>    "\xC1",   # capital A, acute accent
  66     "aacute"    =>    "\xE1",   # small a, acute accent
  67     "Acirc"     =>    "\xC2",   # capital A, circumflex accent
  68     "acirc"     =>    "\xE2",   # small a, circumflex accent
  69     "AElig"     =>    "\xC6",   # capital AE diphthong (ligature)
  70     "aelig"     =>    "\xE6",   # small ae diphthong (ligature)
  71     "Agrave"    =>    "\xC0",   # capital A, grave accent
  72     "agrave"    =>    "\xE0",   # small a, grave accent
  73     "Aring"     =>    "\xC5",   # capital A, ring
  74     "aring"     =>    "\xE5",   # small a, ring
  75     "Atilde"    =>    "\xC3",   # capital A, tilde
  76     "atilde"    =>    "\xE3",   # small a, tilde
  77     "Auml"      =>    "\xC4",   # capital A, dieresis or umlaut mark
  78     "auml"      =>    "\xE4",   # small a, dieresis or umlaut mark
  79     "Ccedil"    =>    "\xC7",   # capital C, cedilla
  80     "ccedil"    =>    "\xE7",   # small c, cedilla
  81     "Eacute"    =>    "\xC9",   # capital E, acute accent
  82     "eacute"    =>    "\xE9",   # small e, acute accent
  83     "Ecirc"     =>    "\xCA",   # capital E, circumflex accent
  84     "ecirc"     =>    "\xEA",   # small e, circumflex accent
  85     "Egrave"    =>    "\xC8",   # capital E, grave accent
  86     "egrave"    =>    "\xE8",   # small e, grave accent
  87     "ETH"       =>    "\xD0",   # capital Eth, Icelandic
  88     "eth"       =>    "\xF0",   # small eth, Icelandic
  89     "Euml"      =>    "\xCB",   # capital E, dieresis or umlaut mark
  90     "euml"      =>    "\xEB",   # small e, dieresis or umlaut mark
  91     "Iacute"    =>    "\xCD",   # capital I, acute accent
  92     "iacute"    =>    "\xED",   # small i, acute accent
  93     "Icirc"     =>    "\xCE",   # capital I, circumflex accent
  94     "icirc"     =>    "\xEE",   # small i, circumflex accent
  95     "Igrave"    =>    "\xCC",   # capital I, grave accent
  96     "igrave"    =>    "\xEC",   # small i, grave accent
  97     "Iuml"      =>    "\xCF",   # capital I, dieresis or umlaut mark
  98     "iuml"      =>    "\xEF",   # small i, dieresis or umlaut mark
  99     "Ntilde"    =>    "\xD1",   # capital N, tilde
 100     "ntilde"    =>    "\xF1",   # small n, tilde
 101     "Oacute"    =>    "\xD3",   # capital O, acute accent
 102     "oacute"    =>    "\xF3",   # small o, acute accent
 103     "Ocirc"     =>    "\xD4",   # capital O, circumflex accent
 104     "ocirc"     =>    "\xF4",   # small o, circumflex accent
 105     "Ograve"    =>    "\xD2",   # capital O, grave accent
 106     "ograve"    =>    "\xF2",   # small o, grave accent
 107     "Oslash"    =>    "\xD8",   # capital O, slash
 108     "oslash"    =>    "\xF8",   # small o, slash
 109     "Otilde"    =>    "\xD5",   # capital O, tilde
 110     "otilde"    =>    "\xF5",   # small o, tilde
 111     "Ouml"      =>    "\xD6",   # capital O, dieresis or umlaut mark
 112     "ouml"      =>    "\xF6",   # small o, dieresis or umlaut mark
 113     "szlig"     =>    "\xDF",   # small sharp s, German (sz ligature)
 114     "THORN"     =>    "\xDE",   # capital THORN, Icelandic
 115     "thorn"     =>    "\xFE",   # small thorn, Icelandic
 116     "Uacute"    =>    "\xDA",   # capital U, acute accent
 117     "uacute"    =>    "\xFA",   # small u, acute accent
 118     "Ucirc"     =>    "\xDB",   # capital U, circumflex accent
 119     "ucirc"     =>    "\xFB",   # small u, circumflex accent
 120     "Ugrave"    =>    "\xD9",   # capital U, grave accent
 121     "ugrave"    =>    "\xF9",   # small u, grave accent
 122     "Uuml"      =>    "\xDC",   # capital U, dieresis or umlaut mark
 123     "uuml"      =>    "\xFC",   # small u, dieresis or umlaut mark
 124     "Yacute"    =>    "\xDD",   # capital Y, acute accent
 125     "yacute"    =>    "\xFD",   # small y, acute accent
 126     "yuml"      =>    "\xFF",   # small y, dieresis or umlaut mark
 127
 128     "laquo"     =>    "\xAB",   # left pointing double angle quotation mark
 129     "lchevron"  =>    "\xAB",   #  synonym (backwards compatibility)
 130     "raquo"     =>    "\xBB",   # right pointing double angle quotation mark
 131     "rchevron"  =>    "\xBB",   #  synonym (backwards compatibility)
 132
 133     "iexcl"     =>    "\xA1",   # inverted exclamation mark
 134     "cent"      =>    "\xA2",   # cent sign
 135     "pound"     =>    "\xA3",   # (UK) pound sign
 136     "curren"    =>    "\xA4",   # currency sign
 137     "yen"       =>    "\xA5",   # yen sign
 138     "brvbar"    =>    "\xA6",   # broken vertical bar
 139     "sect"      =>    "\xA7",   # section sign
 140     "uml"       =>    "\xA8",   # diaresis
 141     "copy"      =>    "\xA9",   # Copyright symbol
 142     "ordf"      =>    "\xAA",   # feminine ordinal indicator
 143     "not"       =>    "\xAC",   # not sign
 144     "shy"       =>    '',       # soft (discretionary) hyphen
 145     "reg"       =>    "\xAE",   # registered trademark
 146     "macr"      =>    "\xAF",   # macron, overline
 147     "deg"       =>    "\xB0",   # degree sign
 148     "plusmn"    =>    "\xB1",   # plus-minus sign
 149     "sup2"      =>    "\xB2",   # superscript 2
 150     "sup3"      =>    "\xB3",   # superscript 3
 151     "acute"     =>    "\xB4",   # acute accent
 152     "micro"     =>    "\xB5",   # micro sign
 153     "para"      =>    "\xB6",   # pilcrow sign = paragraph sign
 154     "middot"    =>    "\xB7",   # middle dot = Georgian comma
 155     "cedil"     =>    "\xB8",   # cedilla
 156     "sup1"      =>    "\xB9",   # superscript 1
 157     "ordm"      =>    "\xBA",   # masculine ordinal indicator
 158     "frac14"    =>    "\xBC",   # vulgar fraction one quarter
 159     "frac12"    =>    "\xBD",   # vulgar fraction one half
 160     "frac34"    =>    "\xBE",   # vulgar fraction three quarters
 161     "iquest"    =>    "\xBF",   # inverted question mark
 162     "times"     =>    "\xD7",   # multiplication sign
 163     "divide"    =>    "\xF7",   # division sign
 164
 165     "nbsp"      =>    "\x01",   # non-breaking space
 166 );
 167
 168
 169 ##############################################################################
 170 # Initialization
 171 ##############################################################################
 172
 173 # Initialize the object.  Must be sure to call our parent initializer.
 174 sub initialize {
 175     my $self = shift;
 176
 177     $$self{alt}      = 0  unless defined $$self{alt};
 178     $$self{indent}   = 4  unless defined $$self{indent};
 179     $$self{loose}    = 0  unless defined $$self{loose};
 180     $$self{sentence} = 0  unless defined $$self{sentence};
 181     $$self{width}    = 76 unless defined $$self{width};
 182
 183     # Figure out what quotes we'll be using for C<> text.
 184     $$self{quotes} ||= '"';
 185     if ($$self{quotes} eq 'none') {
 186         $$self{LQUOTE} = $$self{RQUOTE} = '';
 187     } elsif (length ($$self{quotes}) == 1) {
 188         $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
 189     } elsif ($$self{quotes} =~ /^(.)(.)$/
 190              || $$self{quotes} =~ /^(..)(..)$/) {
 191         $$self{LQUOTE} = $1;
 192         $$self{RQUOTE} = $2;
 193     } else {
 194         croak qq(Invalid quote specification "$$self{quotes}");
 195     }
 196
 197     $$self{INDENTS}  = [];              # Stack of indentations.
 198     $$self{MARGIN}   = $$self{indent};  # Current left margin in spaces.
 199
 200     $self->SUPER::initialize;
 201
 202     # Tell Pod::Parser that we want the non-POD stuff too if code was set.
 203     $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
 204 }
 205
 206
 207 ##############################################################################
 208 # Core overrides
 209 ##############################################################################
 210
 211 # Called for each command paragraph.  Gets the command, the associated
 212 # paragraph, the line number, and a Pod::Paragraph object.  Just dispatches
 213 # the command to a method named the same as the command.  =cut is handled
 214 # internally by Pod::Parser.
 215 sub command {
 216     my $self = shift;
 217     my $command = shift;
 218     return if $command eq 'pod';
 219     return if ($$self{EXCLUDE} && $command ne 'end');
 220     $self->item ("\n") if defined $$self{ITEM};
 221     if ($self->can ('cmd_' . $command)) {
 222         $command = 'cmd_' . $command;
 223         $self->$command (@_);
 224     } else {
 225         my ($text, $line, $paragraph) = @_;
 226         my $file;
 227         ($file, $line) = $paragraph->file_line;
 228         $text =~ s/\n+\z//;
 229         $text = " $text" if ($text =~ /^\S/);
 230         warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
 231         return;
 232     }
 233 }
 234
 235 # Called for a verbatim paragraph.  Gets the paragraph, the line number, and a
 236 # Pod::Paragraph object.  Just output it verbatim, but with tabs converted to
 237 # spaces.
 238 sub verbatim {
 239     my $self = shift;
 240     return if $$self{EXCLUDE};
 241     $self->item if defined $$self{ITEM};
 242     local $_ = shift;
 243     return if /^\s*$/;
 244     s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
 245     $self->output ($_);
 246 }
 247
 248 # Called for a regular text block.  Gets the paragraph, the line number, and a
 249 # Pod::Paragraph object.  Perform interpolation and output the results.
 250 sub textblock {
 251     my $self = shift;
 252     return if $$self{EXCLUDE};
 253     $self->output ($_[0]), return if $$self{VERBATIM};
 254     local $_ = shift;
 255     my $line = shift;
 256
 257     # Interpolate and output the paragraph.
 258     $_ = $self->interpolate ($_, $line);
 259     s/\s+$/\n/;
 260     if (defined $$self{ITEM}) {
 261         $self->item ($_ . "\n");
 262     } else {
 263         $self->output ($self->reformat ($_ . "\n"));
 264     }
 265 }
 266
 267 # Called for an interior sequence.  Gets the command, argument, and a
 268 # Pod::InteriorSequence object and is expected to return the resulting text.
 269 # Calls code, bold, italic, file, and link to handle those types of sequences,
 270 # and handles S<>, E<>, X<>, and Z<> directly.
 271 sub interior_sequence {
 272     local $_;
 273     my ($self, $command, $seq);
 274     ($self, $command, $_, $seq) = @_;
 275
 276     # We have to defer processing of the inside of an L<> formatting code.  If
 277     # this sequence is nested inside an L<> sequence, return the literal raw
 278     # text of it.
 279     my $parent = $seq->nested;
 280     while (defined $parent) {
 281         return $seq->raw_text if ($parent->cmd_name eq 'L');
 282         $parent = $parent->nested;
 283     }
 284
 285     # Index entries are ignored in plain text.
 286     return '' if ($command eq 'X' || $command eq 'Z');
 287
 288     # Expand escapes into the actual character now, warning if invalid.
 289     if ($command eq 'E') {
 290         if (/^\d+$/) {
 291             return chr;
 292         } else {
 293             return $ESCAPES{$_} if defined $ESCAPES{$_};
 294             my $seq = shift;
 295             my ($file, $line) = $seq->file_line;
 296             warn "$file:$line: Unknown escape: E<$_>\n";
 297             return "E<$_>";
 298         }
 299     }
 300
 301     # For all the other sequences, empty content produces no output.
 302     return if $_ eq '';
 303
 304     # For S<>, compress all internal whitespace and then map spaces to \01.
 305     # When we output the text, we'll map this back.
 306     if ($command eq 'S') {
 307         s/\s+/ /g;
 308         tr/ /\01/;
 309         return $_;
 310     }
 311
 312     # Anything else needs to get dispatched to another method.
 313     if    ($command eq 'B') { return $self->seq_b ($_) }
 314     elsif ($command eq 'C') { return $self->seq_c ($_) }
 315     elsif ($command eq 'F') { return $self->seq_f ($_) }
 316     elsif ($command eq 'I') { return $self->seq_i ($_) }
 317     elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
 318     else {
 319         my $seq = shift;
 320         my ($file, $line) = $seq->file_line;
 321         warn "$file:$line: Unknown sequence $command<$_>\n";
 322     }
 323 }
 324
 325 # Called for each paragraph that's actually part of the POD.  We take
 326 # advantage of this opportunity to untabify the input.  Also, if given the
 327 # code option, we may see paragraphs that aren't part of the POD and need to
 328 # output them directly.
 329 sub preprocess_paragraph {
 330     my $self = shift;
 331     local $_ = shift;
 332     1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
 333     $self->output_code ($_) if $self->cutting;
 334     $_;
 335 }
 336
 337
 338 ##############################################################################
 339 # Command paragraphs
 340 ##############################################################################
 341
 342 # All command paragraphs take the paragraph and the line number.
 343
 344 # First level heading.
 345 sub cmd_head1 {
 346     my $self = shift;
 347     local $_ = shift;
 348     s/\s+$//;
 349     $_ = $self->interpolate ($_, shift);
 350     if ($$self{alt}) {
 351         $self->output ("\n==== $_ ====\n\n");
 352     } else {
 353         $_ .= "\n" if $$self{loose};
 354         $self->output ($_ . "\n");
 355     }
 356 }
 357
 358 # Second level heading.
 359 sub cmd_head2 {
 360     my $self = shift;
 361     local $_ = shift;
 362     s/\s+$//;
 363     $_ = $self->interpolate ($_, shift);
 364     if ($$self{alt}) {
 365         $self->output ("\n==   $_   ==\n\n");
 366     } else {
 367         $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
 368     }
 369 }
 370
 371 # Third level heading.
 372 sub cmd_head3 {
 373     my $self = shift;
 374     local $_ = shift;
 375     s/\s+$//;
 376     $_ = $self->interpolate ($_, shift);
 377     if ($$self{alt}) {
 378         $self->output ("\n=    $_    =\n\n");
 379     } else {
 380         $self->output (' ' x ($$self{indent} * 2 / 3 + 0.5) . $_ . "\n\n");
 381     }
 382 }
 383
 384 # Third level heading.
 385 sub cmd_head4 {
 386     my $self = shift;
 387     local $_ = shift;
 388     s/\s+$//;
 389     $_ = $self->interpolate ($_, shift);
 390     if ($$self{alt}) {
 391         $self->output ("\n-    $_    -\n\n");
 392     } else {
 393         $self->output (' ' x ($$self{indent} * 3 / 4 + 0.5) . $_ . "\n\n");
 394     }
 395 }
 396
 397 # Start a list.
 398 sub cmd_over {
 399     my $self = shift;
 400     local $_ = shift;
 401     unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
 402     push (@{ $$self{INDENTS} }, $$self{MARGIN});
 403     $$self{MARGIN} += ($_ + 0);
 404 }
 405
 406 # End a list.
 407 sub cmd_back {
 408     my ($self, $text, $line, $paragraph) = @_;
 409     $$self{MARGIN} = pop @{ $$self{INDENTS} };
 410     unless (defined $$self{MARGIN}) {
 411         my $file;
 412         ($file, $line) = $paragraph->file_line;
 413         warn "$file:$line: Unmatched =back\n";
 414         $$self{MARGIN} = $$self{indent};
 415     }
 416 }
 417
 418 # An individual list item.
 419 sub cmd_item {
 420     my $self = shift;
 421     if (defined $$self{ITEM}) { $self->item }
 422     local $_ = shift;
 423     s/\s+$//;
 424     $$self{ITEM} = $self->interpolate ($_);
 425 }
 426
 427 # Begin a block for a particular translator.  Setting VERBATIM triggers
 428 # special handling in textblock().
 429 sub cmd_begin {
 430     my $self = shift;
 431     local $_ = shift;
 432     my ($kind) = /^(\S+)/ or return;
 433     if ($kind eq 'text') {
 434         $$self{VERBATIM} = 1;
 435     } else {
 436         $$self{EXCLUDE} = 1;
 437     }
 438 }
 439
 440 # End a block for a particular translator.  We assume that all =begin/=end
 441 # pairs are properly closed.
 442 sub cmd_end {
 443     my $self = shift;
 444     $$self{EXCLUDE} = 0;
 445     $$self{VERBATIM} = 0;
 446 }
 447
 448 # One paragraph for a particular translator.  Ignore it unless it's intended
 449 # for text, in which case we treat it as a verbatim text block.
 450 sub cmd_for {
 451     my $self = shift;
 452     local $_ = shift;
 453     my $line = shift;
 454     return unless s/^text\b[ \t]*\n?//;
 455     $self->verbatim ($_, $line);
 456 }
 457
 458
 459 ##############################################################################
 460 # Interior sequences
 461 ##############################################################################
 462
 463 # The simple formatting ones.  These are here mostly so that subclasses can
 464 # override them and do more complicated things.
 465 sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
 466 sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
 467 sub seq_i { return '*' . $_[1] . '*' }
 468
 469 # Apply a whole bunch of messy heuristics to not quote things that don't
 470 # benefit from being quoted.  These originally come from Barrie Slaymaker and
 471 # largely duplicate code in Pod::Man.
 472 sub seq_c {
 473     my $self = shift;
 474     local $_ = shift;
 475
 476     # A regex that matches the portion of a variable reference that's the
 477     # array or hash index, separated out just because we want to use it in
 478     # several places in the following regex.
 479     my $index = '(?: \[.*\] | \{.*\} )?';
 480
 481     # Check for things that we don't want to quote, and if we find any of
 482     # them, return the string with just a font change and no quoting.
 483     m{
 484       ^\s*
 485       (?:
 486          ( [\'\`\"] ) .* \1                             # already quoted
 487        | \` .* \'                                       # `quoted'
 488        | \$+ [\#^]? \S $index                           # special ($^Foo, $")
 489        | [\$\@%&*]+ \#? [:\'\w]+ $index                 # plain var or func
 490        | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
 491        | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )?             # a number
 492        | 0x [a-fA-F\d]+                                 # a hex constant
 493       )
 494       \s*\z
 495      }xo && return $_;
 496
 497     # If we didn't return, go ahead and quote the text.
 498     return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
 499 }
 500
 501 # Handle links.  Since this is plain text, we can't actually make any real
 502 # links, so this is all to figure out what text we print out.  Most of the
 503 # work is done by Pod::ParseLink.
 504 sub seq_l {
 505     my ($self, $link, $seq) = @_;
 506     my ($text, $type) = (parselink ($link))[1,4];
 507     my ($file, $line) = $seq->file_line;
 508     $text = $self->interpolate ($text, $line);
 509     $text = '<' . $text . '>' if $type eq 'url';
 510     return $text || '';
 511 }
 512
 513
 514 ##############################################################################
 515 # List handling
 516 ##############################################################################
 517
 518 # This method is called whenever an =item command is complete (in other words,
 519 # we've seen its associated paragraph or know for certain that it doesn't have
 520 # one).  It gets the paragraph associated with the item as an argument.  If
 521 # that argument is empty, just output the item tag; if it contains a newline,
 522 # output the item tag followed by the newline.  Otherwise, see if there's
 523 # enough room for us to output the item tag in the margin of the text or if we
 524 # have to put it on a separate line.
 525 sub item {
 526     my $self = shift;
 527     local $_ = shift;
 528     my $tag = $$self{ITEM};
 529     unless (defined $tag) {
 530         carp "Item called without tag";
 531         return;
 532     }
 533     undef $$self{ITEM};
 534     my $indent = $$self{INDENTS}[-1];
 535     unless (defined $indent) { $indent = $$self{indent} }
 536     my $space = ' ' x $indent;
 537     $space =~ s/^ /:/ if $$self{alt};
 538     if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
 539         my $margin = $$self{MARGIN};
 540         $$self{MARGIN} = $indent;
 541         my $output = $self->reformat ($tag);
 542         $output =~ s/\n*$/\n/;
 543         $self->output ($output);
 544         $$self{MARGIN} = $margin;
 545         $self->output ($self->reformat ($_)) if /\S/;
 546     } else {
 547         $_ = $self->reformat ($_);
 548         s/^ /:/ if ($$self{alt} && $indent > 0);
 549         my $tagspace = ' ' x length $tag;
 550         s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
 551         $self->output ($_);
 552     }
 553 }
 554
 555
 556 ##############################################################################
 557 # Output formatting
 558 ##############################################################################
 559
 560 # Wrap a line, indenting by the current left margin.  We can't use Text::Wrap
 561 # because it plays games with tabs.  We can't use formline, even though we'd
 562 # really like to, because it screws up non-printing characters.  So we have to
 563 # do the wrapping ourselves.
 564 sub wrap {
 565     my $self = shift;
 566     local $_ = shift;
 567     my $output = '';
 568     my $spaces = ' ' x $$self{MARGIN};
 569     my $width = $$self{width} - $$self{MARGIN};
 570     while (length > $width) {
 571         if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
 572             $output .= $spaces . $1 . "\n";
 573         } else {
 574             last;
 575         }
 576     }
 577     $output .= $spaces . $_;
 578     $output =~ s/\s+$/\n\n/;
 579     $output;
 580 }
 581
 582 # Reformat a paragraph of text for the current margin.  Takes the text to
 583 # reformat and returns the formatted text.
 584 sub reformat {
 585     my $self = shift;
 586     local $_ = shift;
 587
 588     # If we're trying to preserve two spaces after sentences, do some munging
 589     # to support that.  Otherwise, smash all repeated whitespace.
 590     if ($$self{sentence}) {
 591         s/ +$//mg;
 592         s/\.\n/. \n/g;
 593         s/\n/ /g;
 594         s/   +/  /g;
 595     } else {
 596         s/\s+/ /g;
 597     }
 598     $self->wrap ($_);
 599 }
 600
 601 # Output text to the output device.
 602 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
 603
 604 # Output a block of code (something that isn't part of the POD text).  Called
 605 # by preprocess_paragraph only if we were given the code option.  Exists here
 606 # only so that it can be overridden by subclasses.
 607 sub output_code { $_[0]->output ($_[1]) }
 608
 609
 610 ##############################################################################
 611 # Backwards compatibility
 612 ##############################################################################
 613
 614 # The old Pod::Text module did everything in a pod2text() function.  This
 615 # tries to provide the same interface for legacy applications.
 616 sub pod2text {
 617     my @args;
 618
 619     # This is really ugly; I hate doing option parsing in the middle of a
 620     # module.  But the old Pod::Text module supported passing flags to its
 621     # entry function, so handle -a and -<number>.
 622     while ($_[0] =~ /^-/) {
 623         my $flag = shift;
 624         if    ($flag eq '-a')       { push (@args, alt => 1)    }
 625         elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
 626         else {
 627             unshift (@_, $flag);
 628             last;
 629         }
 630     }
 631
 632     # Now that we know what arguments we're using, create the parser.
 633     my $parser = Pod::Text->new (@args);
 634
 635     # If two arguments were given, the second argument is going to be a file
 636     # handle.  That means we want to call parse_from_filehandle(), which means
 637     # we need to turn the first argument into a file handle.  Magic open will
 638     # handle the <&STDIN case automagically.
 639     if (defined $_[1]) {
 640         my @fhs = @_;
 641         local *IN;
 642         unless (open (IN, $fhs[0])) {
 643             croak ("Can't open $fhs[0] for reading: $!\n");
 644             return;
 645         }
 646         $fhs[0] = \*IN;
 647         return $parser->parse_from_filehandle (@fhs);
 648     } else {
 649         return $parser->parse_from_file (@_);
 650     }
 651 }
 652
 653
 654 ##############################################################################
 655 # Module return value and documentation
 656 ##############################################################################
 657
 658 1;
 659 __END__
 660
 661 =head1 NAME
 662
 663 Pod::Text - Convert POD data to formatted ASCII text
 664
 665 =head1 SYNOPSIS
 666
 667     use Pod::Text;
 668     my $parser = Pod::Text->new (sentence => 0, width => 78);
 669
 670     # Read POD from STDIN and write to STDOUT.
 671     $parser->parse_from_filehandle;
 672
 673     # Read POD from file.pod and write to file.txt.
 674     $parser->parse_from_file ('file.pod', 'file.txt');
 675
 676 =head1 DESCRIPTION
 677
 678 Pod::Text is a module that can convert documentation in the POD format (the
 679 preferred language for documenting Perl) into formatted ASCII.  It uses no
 680 special formatting controls or codes whatsoever, and its output is therefore
 681 suitable for nearly any device.
 682
 683 As a derived class from Pod::Parser, Pod::Text supports the same methods and
 684 interfaces.  See L<Pod::Parser> for all the details; briefly, one creates a
 685 new parser with C<< Pod::Text->new() >> and then calls either
 686 parse_from_filehandle() or parse_from_file().
 687
 688 new() can take options, in the form of key/value pairs, that control the
 689 behavior of the parser.  The currently recognized options are:
 690
 691 =over 4
 692
 693 =item alt
 694
 695 If set to a true value, selects an alternate output format that, among other
 696 things, uses a different heading style and marks C<=item> entries with a
 697 colon in the left margin.  Defaults to false.
 698
 699 =item code
 700
 701 If set to a true value, the non-POD parts of the input file will be included
 702 in the output.  Useful for viewing code documented with POD blocks with the
 703 POD rendered and the code left intact.
 704
 705 =item indent
 706
 707 The number of spaces to indent regular text, and the default indentation for
 708 C<=over> blocks.  Defaults to 4.
 709
 710 =item loose
 711
 712 If set to a true value, a blank line is printed after a C<=head1> heading.
 713 If set to false (the default), no blank line is printed after C<=head1>,
 714 although one is still printed after C<=head2>.  This is the default because
 715 it's the expected formatting for manual pages; if you're formatting
 716 arbitrary text documents, setting this to true may result in more pleasing
 717 output.
 718
 719 =item quotes
 720
 721 Sets the quote marks used to surround CE<lt>> text.  If the value is a
 722 single character, it is used as both the left and right quote; if it is two
 723 characters, the first character is used as the left quote and the second as
 724 the right quoted; and if it is four characters, the first two are used as
 725 the left quote and the second two as the right quote.
 726
 727 This may also be set to the special value C<none>, in which case no quote
 728 marks are added around CE<lt>> text.
 729
 730 =item sentence
 731
 732 If set to a true value, Pod::Text will assume that each sentence ends in two
 733 spaces, and will try to preserve that spacing.  If set to false, all
 734 consecutive whitespace in non-verbatim paragraphs is compressed into a
 735 single space.  Defaults to true.
 736
 737 =item width
 738
 739 The column at which to wrap text on the right-hand side.  Defaults to 76.
 740
 741 =back
 742
 743 The standard Pod::Parser method parse_from_filehandle() takes up to two
 744 arguments, the first being the file handle to read POD from and the second
 745 being the file handle to write the formatted output to.  The first defaults
 746 to STDIN if not given, and the second defaults to STDOUT.  The method
 747 parse_from_file() is almost identical, except that its two arguments are the
 748 input and output disk files instead.  See L<Pod::Parser> for the specific
 749 details.
 750
 751 =head1 DIAGNOSTICS
 752
 753 =over 4
 754
 755 =item Bizarre space in item
 756
 757 =item Item called without tag
 758
 759 (W) Something has gone wrong in internal C<=item> processing.  These
 760 messages indicate a bug in Pod::Text; you should never see them.
 761
 762 =item Can't open %s for reading: %s
 763
 764 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
 765 and the input file it was given could not be opened.
 766
 767 =item Invalid quote specification "%s"
 768
 769 (F) The quote specification given (the quotes option to the constructor) was
 770 invalid.  A quote specification must be one, two, or four characters long.
 771
 772 =item %s:%d: Unknown command paragraph "%s".
 773
 774 (W) The POD source contained a non-standard command paragraph (something of
 775 the form C<=command args>) that Pod::Man didn't know about.  It was ignored.
 776
 777 =item %s:%d: Unknown escape: %s
 778
 779 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
 780 know about.
 781
 782 =item %s:%d: Unknown sequence: %s
 783
 784 (W) The POD source contained a non-standard internal sequence (something of
 785 the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
 786
 787 =item %s:%d: Unmatched =back
 788
 789 (W) Pod::Text encountered a C<=back> command that didn't correspond to an
 790 C<=over> command.
 791
 792 =back
 793
 794 =head1 RESTRICTIONS
 795
 796 Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
 797 output, due to an internal implementation detail.
 798
 799 =head1 NOTES
 800
 801 This is a replacement for an earlier Pod::Text module written by Tom
 802 Christiansen.  It has a revamped interface, since it now uses Pod::Parser,
 803 but an interface roughly compatible with the old Pod::Text::pod2text()
 804 function is still available.  Please change to the new calling convention,
 805 though.
 806
 807 The original Pod::Text contained code to do formatting via termcap
 808 sequences, although it wasn't turned on by default and it was problematic to
 809 get it to work at all.  This rewrite doesn't even try to do that, but a
 810 subclass of it does.  Look for L<Pod::Text::Termcap>.
 811
 812 =head1 SEE ALSO
 813
 814 L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
 815
 816 =head1 AUTHOR
 817
 818 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
 819 Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
 820 Pod::Parser by Brad Appleton <bradapp@enteract.com>.
 821
 822 =head1 COPYRIGHT AND LICENSE
 823
 824 Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
 825
 826 This program is free software; you may redistribute it and/or modify it
 827 under the same terms as Perl itself.
 828
 829 =cut