Upgrade to podlators 1.14.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm
CommitLineData
6055f9d4 1# Pod::Text -- Convert POD data to formatted ASCII text.
b616daaf 2# $Id: Text.pm,v 2.15 2001/11/23 06:14:10 eagle Exp $
6055f9d4 3#
16ba52cf 4# Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
6055f9d4 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
6055f9d4 7# under the same terms as Perl itself.
8#
3c014959 9# This module replaces the old Pod::Text that came with versions of Perl prior
10# to 5.6.0, and attempts to match its output except for some specific
11# circumstances where other decisions seemed to produce better output. It
12# uses Pod::Parser and is designed to be very easy to subclass.
13#
14# Perl core hackers, please note that this module is also separately
15# maintained outside of the Perl core as part of the podlators. Please send
16# me any patches at the address above in addition to sending them to the
17# standard Perl mailing lists.
6055f9d4 18
3c014959 19##############################################################################
6055f9d4 20# Modules and declarations
3c014959 21##############################################################################
69e00e79 22
6055f9d4 23package Pod::Text;
69e00e79 24
6055f9d4 25require 5.004;
26
27f805f4 27use Carp qw(carp croak);
2e20e14f 28use Exporter ();
bf202ccd 29use Pod::ParseLink qw(parselink);
27f805f4 30use Pod::Select ();
6055f9d4 31
32use strict;
2e20e14f 33use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
6055f9d4 34
3c014959 35# We inherit from Pod::Select instead of Pod::Parser so that we can be used by
36# Pod::Usage.
2e20e14f 37@ISA = qw(Pod::Select Exporter);
6055f9d4 38
2e20e14f 39# We have to export pod2text for backward compatibility.
40@EXPORT = qw(pod2text);
41
3c014959 42# Don't use the CVS revision as the version, since this module is also in Perl
43# core and too many things could munge CVS magic revision strings. This
44# number should ideally be the same as the CVS revision in podlators, however.
b616daaf 45$VERSION = 2.15;
6055f9d4 46
47
3c014959 48##############################################################################
6055f9d4 49# Table of supported E<> escapes
3c014959 50##############################################################################
6055f9d4 51
3c014959 52# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
53# got it near verbatim from the original Pod::Text. It is therefore credited
54# to Tom Christiansen, and I'm glad I didn't have to write it. :) "iexcl" to
55# "divide" added by Tim Jenness.
6055f9d4 56%ESCAPES = (
57 'amp' => '&', # ampersand
bf202ccd 58 'apos' => "'", # apostrophe
6055f9d4 59 'lt' => '<', # left chevron, less-than
60 'gt' => '>', # right chevron, greater-than
61 'quot' => '"', # double quote
ab1f1d91 62 'sol' => '/', # solidus (forward slash)
be3174d2 63 'verbar' => '|', # vertical bar
5cdeb5a2 64
6055f9d4 65 "Aacute" => "\xC1", # capital A, acute accent
66 "aacute" => "\xE1", # small a, acute accent
67 "Acirc" => "\xC2", # capital A, circumflex accent
68 "acirc" => "\xE2", # small a, circumflex accent
69 "AElig" => "\xC6", # capital AE diphthong (ligature)
70 "aelig" => "\xE6", # small ae diphthong (ligature)
71 "Agrave" => "\xC0", # capital A, grave accent
72 "agrave" => "\xE0", # small a, grave accent
73 "Aring" => "\xC5", # capital A, ring
74 "aring" => "\xE5", # small a, ring
75 "Atilde" => "\xC3", # capital A, tilde
76 "atilde" => "\xE3", # small a, tilde
77 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
78 "auml" => "\xE4", # small a, dieresis or umlaut mark
79 "Ccedil" => "\xC7", # capital C, cedilla
80 "ccedil" => "\xE7", # small c, cedilla
81 "Eacute" => "\xC9", # capital E, acute accent
82 "eacute" => "\xE9", # small e, acute accent
83 "Ecirc" => "\xCA", # capital E, circumflex accent
84 "ecirc" => "\xEA", # small e, circumflex accent
85 "Egrave" => "\xC8", # capital E, grave accent
86 "egrave" => "\xE8", # small e, grave accent
87 "ETH" => "\xD0", # capital Eth, Icelandic
88 "eth" => "\xF0", # small eth, Icelandic
89 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
90 "euml" => "\xEB", # small e, dieresis or umlaut mark
ee89c1da 91 "Iacute" => "\xCD", # capital I, acute accent
92 "iacute" => "\xED", # small i, acute accent
6055f9d4 93 "Icirc" => "\xCE", # capital I, circumflex accent
94 "icirc" => "\xEE", # small i, circumflex accent
ee89c1da 95 "Igrave" => "\xCC", # capital I, grave accent
96 "igrave" => "\xEC", # small i, grave accent
6055f9d4 97 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
98 "iuml" => "\xEF", # small i, dieresis or umlaut mark
99 "Ntilde" => "\xD1", # capital N, tilde
100 "ntilde" => "\xF1", # small n, tilde
101 "Oacute" => "\xD3", # capital O, acute accent
102 "oacute" => "\xF3", # small o, acute accent
103 "Ocirc" => "\xD4", # capital O, circumflex accent
104 "ocirc" => "\xF4", # small o, circumflex accent
105 "Ograve" => "\xD2", # capital O, grave accent
106 "ograve" => "\xF2", # small o, grave accent
107 "Oslash" => "\xD8", # capital O, slash
108 "oslash" => "\xF8", # small o, slash
109 "Otilde" => "\xD5", # capital O, tilde
110 "otilde" => "\xF5", # small o, tilde
111 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
112 "ouml" => "\xF6", # small o, dieresis or umlaut mark
113 "szlig" => "\xDF", # small sharp s, German (sz ligature)
114 "THORN" => "\xDE", # capital THORN, Icelandic
115 "thorn" => "\xFE", # small thorn, Icelandic
116 "Uacute" => "\xDA", # capital U, acute accent
117 "uacute" => "\xFA", # small u, acute accent
118 "Ucirc" => "\xDB", # capital U, circumflex accent
119 "ucirc" => "\xFB", # small u, circumflex accent
120 "Ugrave" => "\xD9", # capital U, grave accent
121 "ugrave" => "\xF9", # small u, grave accent
122 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
123 "uuml" => "\xFC", # small u, dieresis or umlaut mark
124 "Yacute" => "\xDD", # capital Y, acute accent
125 "yacute" => "\xFD", # small y, acute accent
126 "yuml" => "\xFF", # small y, dieresis or umlaut mark
5cdeb5a2 127
a3e04946 128 "laquo" => "\xAB", # left pointing double angle quotation mark
129 "lchevron" => "\xAB", # synonym (backwards compatibility)
130 "raquo" => "\xBB", # right pointing double angle quotation mark
131 "rchevron" => "\xBB", # synonym (backwards compatibility)
132
133 "iexcl" => "\xA1", # inverted exclamation mark
134 "cent" => "\xA2", # cent sign
135 "pound" => "\xA3", # (UK) pound sign
136 "curren" => "\xA4", # currency sign
137 "yen" => "\xA5", # yen sign
138 "brvbar" => "\xA6", # broken vertical bar
139 "sect" => "\xA7", # section sign
140 "uml" => "\xA8", # diaresis
141 "copy" => "\xA9", # Copyright symbol
142 "ordf" => "\xAA", # feminine ordinal indicator
143 "not" => "\xAC", # not sign
bf202ccd 144 "shy" => '', # soft (discretionary) hyphen
a3e04946 145 "reg" => "\xAE", # registered trademark
146 "macr" => "\xAF", # macron, overline
147 "deg" => "\xB0", # degree sign
148 "plusmn" => "\xB1", # plus-minus sign
149 "sup2" => "\xB2", # superscript 2
150 "sup3" => "\xB3", # superscript 3
151 "acute" => "\xB4", # acute accent
152 "micro" => "\xB5", # micro sign
153 "para" => "\xB6", # pilcrow sign = paragraph sign
154 "middot" => "\xB7", # middle dot = Georgian comma
155 "cedil" => "\xB8", # cedilla
156 "sup1" => "\xB9", # superscript 1
157 "ordm" => "\xBA", # masculine ordinal indicator
158 "frac14" => "\xBC", # vulgar fraction one quarter
159 "frac12" => "\xBD", # vulgar fraction one half
160 "frac34" => "\xBE", # vulgar fraction three quarters
161 "iquest" => "\xBF", # inverted question mark
162 "times" => "\xD7", # multiplication sign
163 "divide" => "\xF7", # division sign
bf202ccd 164
165 "nbsp" => "\x01", # non-breaking space
6055f9d4 166);
69e00e79 167
69e00e79 168
3c014959 169##############################################################################
6055f9d4 170# Initialization
3c014959 171##############################################################################
69e00e79 172
6055f9d4 173# Initialize the object. Must be sure to call our parent initializer.
174sub initialize {
175 my $self = shift;
69e00e79 176
6055f9d4 177 $$self{alt} = 0 unless defined $$self{alt};
178 $$self{indent} = 4 unless defined $$self{indent};
179 $$self{loose} = 0 unless defined $$self{loose};
180 $$self{sentence} = 0 unless defined $$self{sentence};
181 $$self{width} = 76 unless defined $$self{width};
69e00e79 182
ab1f1d91 183 # Figure out what quotes we'll be using for C<> text.
50a3fd2a 184 $$self{quotes} ||= '"';
ab1f1d91 185 if ($$self{quotes} eq 'none') {
186 $$self{LQUOTE} = $$self{RQUOTE} = '';
187 } elsif (length ($$self{quotes}) == 1) {
188 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
189 } elsif ($$self{quotes} =~ /^(.)(.)$/
190 || $$self{quotes} =~ /^(..)(..)$/) {
191 $$self{LQUOTE} = $1;
192 $$self{RQUOTE} = $2;
193 } else {
194 croak qq(Invalid quote specification "$$self{quotes}");
195 }
196
6055f9d4 197 $$self{INDENTS} = []; # Stack of indentations.
198 $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
69e00e79 199
6055f9d4 200 $self->SUPER::initialize;
59548eca 201
202 # Tell Pod::Parser that we want the non-POD stuff too if code was set.
203 $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
6055f9d4 204}
69e00e79 205
69e00e79 206
3c014959 207##############################################################################
6055f9d4 208# Core overrides
3c014959 209##############################################################################
6055f9d4 210
211# Called for each command paragraph. Gets the command, the associated
212# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
213# the command to a method named the same as the command. =cut is handled
214# internally by Pod::Parser.
215sub command {
216 my $self = shift;
217 my $command = shift;
218 return if $command eq 'pod';
219 return if ($$self{EXCLUDE} && $command ne 'end');
ab1f1d91 220 if ($self->can ('cmd_' . $command)) {
221 $command = 'cmd_' . $command;
222 $self->$command (@_);
223 } else {
224 my ($text, $line, $paragraph) = @_;
5cdeb5a2 225 my $file;
226 ($file, $line) = $paragraph->file_line;
ab1f1d91 227 $text =~ s/\n+\z//;
228 $text = " $text" if ($text =~ /^\S/);
229 warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
230 return;
231 }
6055f9d4 232}
69e00e79 233
3c014959 234# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
235# Pod::Paragraph object. Just output it verbatim, but with tabs converted to
236# spaces.
6055f9d4 237sub verbatim {
238 my $self = shift;
239 return if $$self{EXCLUDE};
240 $self->item if defined $$self{ITEM};
241 local $_ = shift;
242 return if /^\s*$/;
243 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
244 $self->output ($_);
245}
69e00e79 246
3c014959 247# Called for a regular text block. Gets the paragraph, the line number, and a
248# Pod::Paragraph object. Perform interpolation and output the results.
6055f9d4 249sub textblock {
27f805f4 250 my $self = shift;
6055f9d4 251 return if $$self{EXCLUDE};
27f805f4 252 $self->output ($_[0]), return if $$self{VERBATIM};
253 local $_ = shift;
254 my $line = shift;
6055f9d4 255
bf202ccd 256 # Interpolate and output the paragraph.
6055f9d4 257 $_ = $self->interpolate ($_, $line);
258 s/\s+$/\n/;
259 if (defined $$self{ITEM}) {
260 $self->item ($_ . "\n");
261 } else {
262 $self->output ($self->reformat ($_ . "\n"));
263 }
264}
69e00e79 265
6055f9d4 266# Called for an interior sequence. Gets the command, argument, and a
267# Pod::InteriorSequence object and is expected to return the resulting text.
3c014959 268# Calls code, bold, italic, file, and link to handle those types of sequences,
269# and handles S<>, E<>, X<>, and Z<> directly.
6055f9d4 270sub interior_sequence {
bf202ccd 271 local $_;
272 my ($self, $command, $seq);
273 ($self, $command, $_, $seq) = @_;
274
275 # We have to defer processing of the inside of an L<> formatting code. If
276 # this sequence is nested inside an L<> sequence, return the literal raw
277 # text of it.
278 my $parent = $seq->nested;
279 while (defined $parent) {
280 return $seq->raw_text if ($parent->cmd_name eq 'L');
281 $parent = $parent->nested;
282 }
283
284 # Index entries are ignored in plain text.
6055f9d4 285 return '' if ($command eq 'X' || $command eq 'Z');
69e00e79 286
59548eca 287 # Expand escapes into the actual character now, warning if invalid.
6055f9d4 288 if ($command eq 'E') {
2e20e14f 289 if (/^\d+$/) {
290 return chr;
291 } else {
292 return $ESCAPES{$_} if defined $ESCAPES{$_};
59548eca 293 my $seq = shift;
294 my ($file, $line) = $seq->file_line;
295 warn "$file:$line: Unknown escape: E<$_>\n";
2e20e14f 296 return "E<$_>";
297 }
6055f9d4 298 }
69e00e79 299
6055f9d4 300 # For all the other sequences, empty content produces no output.
27f805f4 301 return if $_ eq '';
69e00e79 302
6055f9d4 303 # For S<>, compress all internal whitespace and then map spaces to \01.
304 # When we output the text, we'll map this back.
305 if ($command eq 'S') {
bf202ccd 306 s/\s+/ /g;
6055f9d4 307 tr/ /\01/;
308 return $_;
309 }
69e00e79 310
6055f9d4 311 # Anything else needs to get dispatched to another method.
312 if ($command eq 'B') { return $self->seq_b ($_) }
313 elsif ($command eq 'C') { return $self->seq_c ($_) }
314 elsif ($command eq 'F') { return $self->seq_f ($_) }
315 elsif ($command eq 'I') { return $self->seq_i ($_) }
bf202ccd 316 elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
59548eca 317 else {
318 my $seq = shift;
319 my ($file, $line) = $seq->file_line;
320 warn "$file:$line: Unknown sequence $command<$_>\n";
321 }
6055f9d4 322}
f02a87df 323
6055f9d4 324# Called for each paragraph that's actually part of the POD. We take
59548eca 325# advantage of this opportunity to untabify the input. Also, if given the
326# code option, we may see paragraphs that aren't part of the POD and need to
327# output them directly.
6055f9d4 328sub preprocess_paragraph {
329 my $self = shift;
330 local $_ = shift;
331 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
59548eca 332 $self->output_code ($_) if $self->cutting;
6055f9d4 333 $_;
334}
3ec07288 335
69e00e79 336
3c014959 337##############################################################################
6055f9d4 338# Command paragraphs
3c014959 339##############################################################################
f2506fb2 340
6055f9d4 341# All command paragraphs take the paragraph and the line number.
69e00e79 342
6055f9d4 343# First level heading.
344sub cmd_head1 {
b616daaf 345 my ($self, $text, $line) = @_;
346 $self->heading ($text, $line, 0, '====');
6055f9d4 347}
69e00e79 348
6055f9d4 349# Second level heading.
350sub cmd_head2 {
b616daaf 351 my ($self, $text, $line) = @_;
352 $self->heading ($text, $line, $$self{indent} / 2, '== ');
6055f9d4 353}
69e00e79 354
50a3fd2a 355# Third level heading.
356sub cmd_head3 {
b616daaf 357 my ($self, $text, $line) = @_;
358 $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '= ');
50a3fd2a 359}
360
361# Third level heading.
362sub cmd_head4 {
b616daaf 363 my ($self, $text, $line) = @_;
364 $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '- ');
50a3fd2a 365}
366
6055f9d4 367# Start a list.
368sub cmd_over {
369 my $self = shift;
370 local $_ = shift;
b616daaf 371 $self->item ("\n\n") if defined $$self{ITEM};
6055f9d4 372 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
373 push (@{ $$self{INDENTS} }, $$self{MARGIN});
374 $$self{MARGIN} += ($_ + 0);
375}
69e00e79 376
6055f9d4 377# End a list.
378sub cmd_back {
59548eca 379 my ($self, $text, $line, $paragraph) = @_;
b616daaf 380 $self->item ("\n\n") if defined $$self{ITEM};
6055f9d4 381 $$self{MARGIN} = pop @{ $$self{INDENTS} };
382 unless (defined $$self{MARGIN}) {
59548eca 383 my $file;
384 ($file, $line) = $paragraph->file_line;
385 warn "$file:$line: Unmatched =back\n";
6055f9d4 386 $$self{MARGIN} = $$self{indent};
387 }
69e00e79 388}
389
6055f9d4 390# An individual list item.
391sub cmd_item {
392 my $self = shift;
393 if (defined $$self{ITEM}) { $self->item }
394 local $_ = shift;
395 s/\s+$//;
b616daaf 396 $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
6055f9d4 397}
69e00e79 398
27f805f4 399# Begin a block for a particular translator. Setting VERBATIM triggers
400# special handling in textblock().
6055f9d4 401sub cmd_begin {
402 my $self = shift;
403 local $_ = shift;
404 my ($kind) = /^(\S+)/ or return;
27f805f4 405 if ($kind eq 'text') {
406 $$self{VERBATIM} = 1;
407 } else {
408 $$self{EXCLUDE} = 1;
409 }
6055f9d4 410}
f2506fb2 411
6055f9d4 412# End a block for a particular translator. We assume that all =begin/=end
27f805f4 413# pairs are properly closed.
6055f9d4 414sub cmd_end {
415 my $self = shift;
27f805f4 416 $$self{EXCLUDE} = 0;
417 $$self{VERBATIM} = 0;
5cdeb5a2 418}
6055f9d4 419
420# One paragraph for a particular translator. Ignore it unless it's intended
27f805f4 421# for text, in which case we treat it as a verbatim text block.
6055f9d4 422sub cmd_for {
423 my $self = shift;
424 local $_ = shift;
425 my $line = shift;
27f805f4 426 return unless s/^text\b[ \t]*\n?//;
427 $self->verbatim ($_, $line);
6055f9d4 428}
f2506fb2 429
69e00e79 430
3c014959 431##############################################################################
6055f9d4 432# Interior sequences
3c014959 433##############################################################################
69e00e79 434
6055f9d4 435# The simple formatting ones. These are here mostly so that subclasses can
436# override them and do more complicated things.
27f805f4 437sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
27f805f4 438sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
6055f9d4 439sub seq_i { return '*' . $_[1] . '*' }
3c014959 440
441# Apply a whole bunch of messy heuristics to not quote things that don't
442# benefit from being quoted. These originally come from Barrie Slaymaker and
443# largely duplicate code in Pod::Man.
ab1f1d91 444sub seq_c {
3c014959 445 my $self = shift;
446 local $_ = shift;
447
448 # A regex that matches the portion of a variable reference that's the
449 # array or hash index, separated out just because we want to use it in
450 # several places in the following regex.
451 my $index = '(?: \[.*\] | \{.*\} )?';
452
453 # Check for things that we don't want to quote, and if we find any of
454 # them, return the string with just a font change and no quoting.
455 m{
456 ^\s*
457 (?:
458 ( [\'\`\"] ) .* \1 # already quoted
459 | \` .* \' # `quoted'
460 | \$+ [\#^]? \S $index # special ($^Foo, $")
461 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
462 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
463 | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )? # a number
464 | 0x [a-fA-F\d]+ # a hex constant
465 )
466 \s*\z
467 }xo && return $_;
468
469 # If we didn't return, go ahead and quote the text.
470 return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
ab1f1d91 471}
69e00e79 472
bf202ccd 473# Handle links. Since this is plain text, we can't actually make any real
474# links, so this is all to figure out what text we print out. Most of the
475# work is done by Pod::ParseLink.
6055f9d4 476sub seq_l {
bf202ccd 477 my ($self, $link, $seq) = @_;
478 my ($text, $type) = (parselink ($link))[1,4];
479 my ($file, $line) = $seq->file_line;
480 $text = $self->interpolate ($text, $line);
481 $text = '<' . $text . '>' if $type eq 'url';
482 return $text || '';
69e00e79 483}
484
6055f9d4 485
3c014959 486##############################################################################
b616daaf 487# Header handling
488##############################################################################
489
490# The common code for handling all headers. Takes the interpolated header
491# text, the line number, the indentation, and the surrounding marker for the
492# alt formatting method.
493sub heading {
494 my ($self, $text, $line, $indent, $marker) = @_;
495 $self->item ("\n\n") if defined $$self{ITEM};
496 $text =~ s/\s+$//;
497 $text = $self->interpolate ($text, $line);
498 if ($$self{alt}) {
499 my $closemark = reverse (split (//, $marker));
500 $self->output ("\n" . "$marker $text $closemark" . "\n\n");
501 } else {
502 $text .= "\n" if $$self{loose};
503 $self->output (' ' x $indent . $text . "\n");
504 }
505}
506
507
508##############################################################################
6055f9d4 509# List handling
3c014959 510##############################################################################
511
512# This method is called whenever an =item command is complete (in other words,
513# we've seen its associated paragraph or know for certain that it doesn't have
514# one). It gets the paragraph associated with the item as an argument. If
515# that argument is empty, just output the item tag; if it contains a newline,
516# output the item tag followed by the newline. Otherwise, see if there's
517# enough room for us to output the item tag in the margin of the text or if we
518# have to put it on a separate line.
6055f9d4 519sub item {
520 my $self = shift;
521 local $_ = shift;
522 my $tag = $$self{ITEM};
523 unless (defined $tag) {
59548eca 524 carp "Item called without tag";
6055f9d4 525 return;
69e00e79 526 }
6055f9d4 527 undef $$self{ITEM};
528 my $indent = $$self{INDENTS}[-1];
529 unless (defined $indent) { $indent = $$self{indent} }
530 my $space = ' ' x $indent;
531 $space =~ s/^ /:/ if $$self{alt};
532 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
27f805f4 533 my $margin = $$self{MARGIN};
534 $$self{MARGIN} = $indent;
535 my $output = $self->reformat ($tag);
536 $output =~ s/\n*$/\n/;
b616daaf 537
538 # If the text is just whitespace, we have an empty item paragraph;
539 # this can result from =over/=item/=back without any intermixed
540 # paragraphs. Insert some whitespace to keep the =item from merging
541 # into the next paragraph.
542 $output .= "\n" if $_ && $_ =~ /^\s*$/;
543
27f805f4 544 $self->output ($output);
545 $$self{MARGIN} = $margin;
b616daaf 546 $self->output ($self->reformat ($_)) if $_ && /\S/;
6055f9d4 547 } else {
548 $_ = $self->reformat ($_);
549 s/^ /:/ if ($$self{alt} && $indent > 0);
550 my $tagspace = ' ' x length $tag;
551 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
552 $self->output ($_);
69e00e79 553 }
554}
555
69e00e79 556
3c014959 557##############################################################################
6055f9d4 558# Output formatting
3c014959 559##############################################################################
69e00e79 560
3c014959 561# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
562# because it plays games with tabs. We can't use formline, even though we'd
563# really like to, because it screws up non-printing characters. So we have to
564# do the wrapping ourselves.
6055f9d4 565sub wrap {
566 my $self = shift;
567 local $_ = shift;
568 my $output = '';
569 my $spaces = ' ' x $$self{MARGIN};
570 my $width = $$self{width} - $$self{MARGIN};
571 while (length > $width) {
572 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
573 $output .= $spaces . $1 . "\n";
574 } else {
575 last;
576 }
69e00e79 577 }
6055f9d4 578 $output .= $spaces . $_;
579 $output =~ s/\s+$/\n\n/;
580 $output;
69e00e79 581}
582
6055f9d4 583# Reformat a paragraph of text for the current margin. Takes the text to
584# reformat and returns the formatted text.
585sub reformat {
586 my $self = shift;
587 local $_ = shift;
69e00e79 588
3c014959 589 # If we're trying to preserve two spaces after sentences, do some munging
590 # to support that. Otherwise, smash all repeated whitespace.
6055f9d4 591 if ($$self{sentence}) {
592 s/ +$//mg;
593 s/\.\n/. \n/g;
594 s/\n/ /g;
595 s/ +/ /g;
69e00e79 596 } else {
6055f9d4 597 s/\s+/ /g;
69e00e79 598 }
6055f9d4 599 $self->wrap ($_);
69e00e79 600}
601
6055f9d4 602# Output text to the output device.
603sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
69e00e79 604
59548eca 605# Output a block of code (something that isn't part of the POD text). Called
606# by preprocess_paragraph only if we were given the code option. Exists here
607# only so that it can be overridden by subclasses.
608sub output_code { $_[0]->output ($_[1]) }
609
69e00e79 610
3c014959 611##############################################################################
27f805f4 612# Backwards compatibility
3c014959 613##############################################################################
27f805f4 614
615# The old Pod::Text module did everything in a pod2text() function. This
616# tries to provide the same interface for legacy applications.
617sub pod2text {
618 my @args;
619
620 # This is really ugly; I hate doing option parsing in the middle of a
621 # module. But the old Pod::Text module supported passing flags to its
622 # entry function, so handle -a and -<number>.
623 while ($_[0] =~ /^-/) {
624 my $flag = shift;
625 if ($flag eq '-a') { push (@args, alt => 1) }
626 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
627 else {
628 unshift (@_, $flag);
629 last;
630 }
631 }
632
633 # Now that we know what arguments we're using, create the parser.
634 my $parser = Pod::Text->new (@args);
635
636 # If two arguments were given, the second argument is going to be a file
3c014959 637 # handle. That means we want to call parse_from_filehandle(), which means
638 # we need to turn the first argument into a file handle. Magic open will
639 # handle the <&STDIN case automagically.
27f805f4 640 if (defined $_[1]) {
ab1f1d91 641 my @fhs = @_;
27f805f4 642 local *IN;
ab1f1d91 643 unless (open (IN, $fhs[0])) {
644 croak ("Can't open $fhs[0] for reading: $!\n");
27f805f4 645 return;
646 }
ab1f1d91 647 $fhs[0] = \*IN;
648 return $parser->parse_from_filehandle (@fhs);
27f805f4 649 } else {
650 return $parser->parse_from_file (@_);
651 }
652}
653
654
3c014959 655##############################################################################
6055f9d4 656# Module return value and documentation
3c014959 657##############################################################################
69e00e79 658
6055f9d4 6591;
660__END__
69e00e79 661
6055f9d4 662=head1 NAME
69e00e79 663
6055f9d4 664Pod::Text - Convert POD data to formatted ASCII text
69e00e79 665
6055f9d4 666=head1 SYNOPSIS
69e00e79 667
6055f9d4 668 use Pod::Text;
669 my $parser = Pod::Text->new (sentence => 0, width => 78);
69e00e79 670
6055f9d4 671 # Read POD from STDIN and write to STDOUT.
672 $parser->parse_from_filehandle;
69e00e79 673
6055f9d4 674 # Read POD from file.pod and write to file.txt.
675 $parser->parse_from_file ('file.pod', 'file.txt');
69e00e79 676
6055f9d4 677=head1 DESCRIPTION
5491a304 678
27f805f4 679Pod::Text is a module that can convert documentation in the POD format (the
680preferred language for documenting Perl) into formatted ASCII. It uses no
681special formatting controls or codes whatsoever, and its output is therefore
682suitable for nearly any device.
69e00e79 683
27f805f4 684As a derived class from Pod::Parser, Pod::Text supports the same methods and
685interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
bf202ccd 686new parser with C<< Pod::Text->new() >> and then calls either
27f805f4 687parse_from_filehandle() or parse_from_file().
6055f9d4 688
27f805f4 689new() can take options, in the form of key/value pairs, that control the
6055f9d4 690behavior of the parser. The currently recognized options are:
691
692=over 4
693
694=item alt
695
696If set to a true value, selects an alternate output format that, among other
697things, uses a different heading style and marks C<=item> entries with a
698colon in the left margin. Defaults to false.
699
59548eca 700=item code
701
702If set to a true value, the non-POD parts of the input file will be included
703in the output. Useful for viewing code documented with POD blocks with the
704POD rendered and the code left intact.
705
6055f9d4 706=item indent
707
708The number of spaces to indent regular text, and the default indentation for
709C<=over> blocks. Defaults to 4.
710
711=item loose
712
713If set to a true value, a blank line is printed after a C<=head1> heading.
714If set to false (the default), no blank line is printed after C<=head1>,
715although one is still printed after C<=head2>. This is the default because
716it's the expected formatting for manual pages; if you're formatting
717arbitrary text documents, setting this to true may result in more pleasing
718output.
719
ab1f1d91 720=item quotes
721
722Sets the quote marks used to surround CE<lt>> text. If the value is a
723single character, it is used as both the left and right quote; if it is two
724characters, the first character is used as the left quote and the second as
725the right quoted; and if it is four characters, the first two are used as
726the left quote and the second two as the right quote.
727
728This may also be set to the special value C<none>, in which case no quote
729marks are added around CE<lt>> text.
730
6055f9d4 731=item sentence
732
27f805f4 733If set to a true value, Pod::Text will assume that each sentence ends in two
734spaces, and will try to preserve that spacing. If set to false, all
6055f9d4 735consecutive whitespace in non-verbatim paragraphs is compressed into a
736single space. Defaults to true.
737
738=item width
739
740The column at which to wrap text on the right-hand side. Defaults to 76.
741
742=back
743
27f805f4 744The standard Pod::Parser method parse_from_filehandle() takes up to two
6055f9d4 745arguments, the first being the file handle to read POD from and the second
746being the file handle to write the formatted output to. The first defaults
747to STDIN if not given, and the second defaults to STDOUT. The method
27f805f4 748parse_from_file() is almost identical, except that its two arguments are the
749input and output disk files instead. See L<Pod::Parser> for the specific
750details.
6055f9d4 751
752=head1 DIAGNOSTICS
753
754=over 4
755
27f805f4 756=item Bizarre space in item
757
59548eca 758=item Item called without tag
759
760(W) Something has gone wrong in internal C<=item> processing. These
761messages indicate a bug in Pod::Text; you should never see them.
27f805f4 762
763=item Can't open %s for reading: %s
764
765(F) Pod::Text was invoked via the compatibility mode pod2text() interface
766and the input file it was given could not be opened.
767
ab1f1d91 768=item Invalid quote specification "%s"
769
770(F) The quote specification given (the quotes option to the constructor) was
771invalid. A quote specification must be one, two, or four characters long.
772
773=item %s:%d: Unknown command paragraph "%s".
774
775(W) The POD source contained a non-standard command paragraph (something of
776the form C<=command args>) that Pod::Man didn't know about. It was ignored.
777
59548eca 778=item %s:%d: Unknown escape: %s
6055f9d4 779
27f805f4 780(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
781know about.
6055f9d4 782
59548eca 783=item %s:%d: Unknown sequence: %s
6055f9d4 784
27f805f4 785(W) The POD source contained a non-standard internal sequence (something of
786the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
6055f9d4 787
59548eca 788=item %s:%d: Unmatched =back
6055f9d4 789
27f805f4 790(W) Pod::Text encountered a C<=back> command that didn't correspond to an
6055f9d4 791C<=over> command.
792
793=back
794
27f805f4 795=head1 RESTRICTIONS
796
797Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
798output, due to an internal implementation detail.
799
6055f9d4 800=head1 NOTES
801
27f805f4 802This is a replacement for an earlier Pod::Text module written by Tom
803Christiansen. It has a revamped interface, since it now uses Pod::Parser,
804but an interface roughly compatible with the old Pod::Text::pod2text()
805function is still available. Please change to the new calling convention,
806though.
6055f9d4 807
808The original Pod::Text contained code to do formatting via termcap
809sequences, although it wasn't turned on by default and it was problematic to
27f805f4 810get it to work at all. This rewrite doesn't even try to do that, but a
bf202ccd 811subclass of it does. Look for L<Pod::Text::Termcap>.
6055f9d4 812
813=head1 SEE ALSO
814
bf202ccd 815L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
6055f9d4 816
817=head1 AUTHOR
818
bf202ccd 819Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
820Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
821Pod::Parser by Brad Appleton <bradapp@enteract.com>.
6055f9d4 822
3c014959 823=head1 COPYRIGHT AND LICENSE
824
825Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
826
827This program is free software; you may redistribute it and/or modify it
828under the same terms as Perl itself.
829
6055f9d4 830=cut