Don't expect ASCII ordering.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm
CommitLineData
6055f9d4 1# Pod::Text -- Convert POD data to formatted ASCII text.
59548eca 2# $Id: Text.pm,v 2.13 2001/10/20 08:07:21 eagle Exp $
6055f9d4 3#
16ba52cf 4# Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
6055f9d4 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
6055f9d4 7# under the same terms as Perl itself.
8#
3c014959 9# This module replaces the old Pod::Text that came with versions of Perl prior
10# to 5.6.0, and attempts to match its output except for some specific
11# circumstances where other decisions seemed to produce better output. It
12# uses Pod::Parser and is designed to be very easy to subclass.
13#
14# Perl core hackers, please note that this module is also separately
15# maintained outside of the Perl core as part of the podlators. Please send
16# me any patches at the address above in addition to sending them to the
17# standard Perl mailing lists.
6055f9d4 18
3c014959 19##############################################################################
6055f9d4 20# Modules and declarations
3c014959 21##############################################################################
69e00e79 22
6055f9d4 23package Pod::Text;
69e00e79 24
6055f9d4 25require 5.004;
26
27f805f4 27use Carp qw(carp croak);
2e20e14f 28use Exporter ();
27f805f4 29use Pod::Select ();
6055f9d4 30
31use strict;
2e20e14f 32use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
6055f9d4 33
3c014959 34# We inherit from Pod::Select instead of Pod::Parser so that we can be used by
35# Pod::Usage.
2e20e14f 36@ISA = qw(Pod::Select Exporter);
6055f9d4 37
2e20e14f 38# We have to export pod2text for backward compatibility.
39@EXPORT = qw(pod2text);
40
3c014959 41# Don't use the CVS revision as the version, since this module is also in Perl
42# core and too many things could munge CVS magic revision strings. This
43# number should ideally be the same as the CVS revision in podlators, however.
59548eca 44$VERSION = 2.13;
6055f9d4 45
46
3c014959 47##############################################################################
6055f9d4 48# Table of supported E<> escapes
3c014959 49##############################################################################
6055f9d4 50
3c014959 51# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
52# got it near verbatim from the original Pod::Text. It is therefore credited
53# to Tom Christiansen, and I'm glad I didn't have to write it. :) "iexcl" to
54# "divide" added by Tim Jenness.
6055f9d4 55%ESCAPES = (
56 'amp' => '&', # ampersand
57 'lt' => '<', # left chevron, less-than
58 'gt' => '>', # right chevron, greater-than
59 'quot' => '"', # double quote
ab1f1d91 60 'sol' => '/', # solidus (forward slash)
be3174d2 61 'verbar' => '|', # vertical bar
5cdeb5a2 62
6055f9d4 63 "Aacute" => "\xC1", # capital A, acute accent
64 "aacute" => "\xE1", # small a, acute accent
65 "Acirc" => "\xC2", # capital A, circumflex accent
66 "acirc" => "\xE2", # small a, circumflex accent
67 "AElig" => "\xC6", # capital AE diphthong (ligature)
68 "aelig" => "\xE6", # small ae diphthong (ligature)
69 "Agrave" => "\xC0", # capital A, grave accent
70 "agrave" => "\xE0", # small a, grave accent
71 "Aring" => "\xC5", # capital A, ring
72 "aring" => "\xE5", # small a, ring
73 "Atilde" => "\xC3", # capital A, tilde
74 "atilde" => "\xE3", # small a, tilde
75 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
76 "auml" => "\xE4", # small a, dieresis or umlaut mark
77 "Ccedil" => "\xC7", # capital C, cedilla
78 "ccedil" => "\xE7", # small c, cedilla
79 "Eacute" => "\xC9", # capital E, acute accent
80 "eacute" => "\xE9", # small e, acute accent
81 "Ecirc" => "\xCA", # capital E, circumflex accent
82 "ecirc" => "\xEA", # small e, circumflex accent
83 "Egrave" => "\xC8", # capital E, grave accent
84 "egrave" => "\xE8", # small e, grave accent
85 "ETH" => "\xD0", # capital Eth, Icelandic
86 "eth" => "\xF0", # small eth, Icelandic
87 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
88 "euml" => "\xEB", # small e, dieresis or umlaut mark
ee89c1da 89 "Iacute" => "\xCD", # capital I, acute accent
90 "iacute" => "\xED", # small i, acute accent
6055f9d4 91 "Icirc" => "\xCE", # capital I, circumflex accent
92 "icirc" => "\xEE", # small i, circumflex accent
ee89c1da 93 "Igrave" => "\xCC", # capital I, grave accent
94 "igrave" => "\xEC", # small i, grave accent
6055f9d4 95 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
96 "iuml" => "\xEF", # small i, dieresis or umlaut mark
97 "Ntilde" => "\xD1", # capital N, tilde
98 "ntilde" => "\xF1", # small n, tilde
99 "Oacute" => "\xD3", # capital O, acute accent
100 "oacute" => "\xF3", # small o, acute accent
101 "Ocirc" => "\xD4", # capital O, circumflex accent
102 "ocirc" => "\xF4", # small o, circumflex accent
103 "Ograve" => "\xD2", # capital O, grave accent
104 "ograve" => "\xF2", # small o, grave accent
105 "Oslash" => "\xD8", # capital O, slash
106 "oslash" => "\xF8", # small o, slash
107 "Otilde" => "\xD5", # capital O, tilde
108 "otilde" => "\xF5", # small o, tilde
109 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
110 "ouml" => "\xF6", # small o, dieresis or umlaut mark
111 "szlig" => "\xDF", # small sharp s, German (sz ligature)
112 "THORN" => "\xDE", # capital THORN, Icelandic
113 "thorn" => "\xFE", # small thorn, Icelandic
114 "Uacute" => "\xDA", # capital U, acute accent
115 "uacute" => "\xFA", # small u, acute accent
116 "Ucirc" => "\xDB", # capital U, circumflex accent
117 "ucirc" => "\xFB", # small u, circumflex accent
118 "Ugrave" => "\xD9", # capital U, grave accent
119 "ugrave" => "\xF9", # small u, grave accent
120 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
121 "uuml" => "\xFC", # small u, dieresis or umlaut mark
122 "Yacute" => "\xDD", # capital Y, acute accent
123 "yacute" => "\xFD", # small y, acute accent
124 "yuml" => "\xFF", # small y, dieresis or umlaut mark
5cdeb5a2 125
a3e04946 126 "laquo" => "\xAB", # left pointing double angle quotation mark
127 "lchevron" => "\xAB", # synonym (backwards compatibility)
128 "raquo" => "\xBB", # right pointing double angle quotation mark
129 "rchevron" => "\xBB", # synonym (backwards compatibility)
130
131 "iexcl" => "\xA1", # inverted exclamation mark
132 "cent" => "\xA2", # cent sign
133 "pound" => "\xA3", # (UK) pound sign
134 "curren" => "\xA4", # currency sign
135 "yen" => "\xA5", # yen sign
136 "brvbar" => "\xA6", # broken vertical bar
137 "sect" => "\xA7", # section sign
138 "uml" => "\xA8", # diaresis
139 "copy" => "\xA9", # Copyright symbol
140 "ordf" => "\xAA", # feminine ordinal indicator
141 "not" => "\xAC", # not sign
142 "shy" => "\xAD", # soft hyphen
143 "reg" => "\xAE", # registered trademark
144 "macr" => "\xAF", # macron, overline
145 "deg" => "\xB0", # degree sign
146 "plusmn" => "\xB1", # plus-minus sign
147 "sup2" => "\xB2", # superscript 2
148 "sup3" => "\xB3", # superscript 3
149 "acute" => "\xB4", # acute accent
150 "micro" => "\xB5", # micro sign
151 "para" => "\xB6", # pilcrow sign = paragraph sign
152 "middot" => "\xB7", # middle dot = Georgian comma
153 "cedil" => "\xB8", # cedilla
154 "sup1" => "\xB9", # superscript 1
155 "ordm" => "\xBA", # masculine ordinal indicator
156 "frac14" => "\xBC", # vulgar fraction one quarter
157 "frac12" => "\xBD", # vulgar fraction one half
158 "frac34" => "\xBE", # vulgar fraction three quarters
159 "iquest" => "\xBF", # inverted question mark
160 "times" => "\xD7", # multiplication sign
161 "divide" => "\xF7", # division sign
6055f9d4 162);
69e00e79 163
69e00e79 164
3c014959 165##############################################################################
6055f9d4 166# Initialization
3c014959 167##############################################################################
69e00e79 168
6055f9d4 169# Initialize the object. Must be sure to call our parent initializer.
170sub initialize {
171 my $self = shift;
69e00e79 172
6055f9d4 173 $$self{alt} = 0 unless defined $$self{alt};
174 $$self{indent} = 4 unless defined $$self{indent};
175 $$self{loose} = 0 unless defined $$self{loose};
176 $$self{sentence} = 0 unless defined $$self{sentence};
177 $$self{width} = 76 unless defined $$self{width};
69e00e79 178
ab1f1d91 179 # Figure out what quotes we'll be using for C<> text.
50a3fd2a 180 $$self{quotes} ||= '"';
ab1f1d91 181 if ($$self{quotes} eq 'none') {
182 $$self{LQUOTE} = $$self{RQUOTE} = '';
183 } elsif (length ($$self{quotes}) == 1) {
184 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
185 } elsif ($$self{quotes} =~ /^(.)(.)$/
186 || $$self{quotes} =~ /^(..)(..)$/) {
187 $$self{LQUOTE} = $1;
188 $$self{RQUOTE} = $2;
189 } else {
190 croak qq(Invalid quote specification "$$self{quotes}");
191 }
192
6055f9d4 193 $$self{INDENTS} = []; # Stack of indentations.
194 $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
69e00e79 195
6055f9d4 196 $self->SUPER::initialize;
59548eca 197
198 # Tell Pod::Parser that we want the non-POD stuff too if code was set.
199 $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
6055f9d4 200}
69e00e79 201
69e00e79 202
3c014959 203##############################################################################
6055f9d4 204# Core overrides
3c014959 205##############################################################################
6055f9d4 206
207# Called for each command paragraph. Gets the command, the associated
208# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
209# the command to a method named the same as the command. =cut is handled
210# internally by Pod::Parser.
211sub command {
212 my $self = shift;
213 my $command = shift;
214 return if $command eq 'pod';
215 return if ($$self{EXCLUDE} && $command ne 'end');
216 $self->item ("\n") if defined $$self{ITEM};
ab1f1d91 217 if ($self->can ('cmd_' . $command)) {
218 $command = 'cmd_' . $command;
219 $self->$command (@_);
220 } else {
221 my ($text, $line, $paragraph) = @_;
5cdeb5a2 222 my $file;
223 ($file, $line) = $paragraph->file_line;
ab1f1d91 224 $text =~ s/\n+\z//;
225 $text = " $text" if ($text =~ /^\S/);
226 warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
227 return;
228 }
6055f9d4 229}
69e00e79 230
3c014959 231# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
232# Pod::Paragraph object. Just output it verbatim, but with tabs converted to
233# spaces.
6055f9d4 234sub verbatim {
235 my $self = shift;
236 return if $$self{EXCLUDE};
237 $self->item if defined $$self{ITEM};
238 local $_ = shift;
239 return if /^\s*$/;
240 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
241 $self->output ($_);
242}
69e00e79 243
3c014959 244# Called for a regular text block. Gets the paragraph, the line number, and a
245# Pod::Paragraph object. Perform interpolation and output the results.
6055f9d4 246sub textblock {
27f805f4 247 my $self = shift;
6055f9d4 248 return if $$self{EXCLUDE};
27f805f4 249 $self->output ($_[0]), return if $$self{VERBATIM};
250 local $_ = shift;
251 my $line = shift;
6055f9d4 252
253 # Perform a little magic to collapse multiple L<> references. This is
27f805f4 254 # here mostly for backwards-compatibility. We'll just rewrite the whole
255 # thing into actual text at this part, bypassing the whole internal
256 # sequence parsing thing.
6055f9d4 257 s{
258 (
259 L< # A link of the form L</something>.
260 /
261 (
262 [:\w]+ # The item has to be a simple word...
263 (\(\))? # ...or simple function.
264 )
265 >
266 (
267 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
5cdeb5a2 268 L<
6055f9d4 269 /
270 (
271 [:\w]+
272 (\(\))?
273 )
274 >
275 )+
276 )
277 } {
278 local $_ = $1;
279 s%L</([^>]+)>%$1%g;
280 my @items = split /(?:,?\s+(?:and\s+)?)/;
281 my $string = "the ";
282 my $i;
283 for ($i = 0; $i < @items; $i++) {
284 $string .= $items[$i];
285 $string .= ", " if @items > 2 && $i != $#items;
286 $string .= " and " if ($i == $#items - 1);
287 }
288 $string .= " entries elsewhere in this document";
289 $string;
290 }gex;
291
292 # Now actually interpolate and output the paragraph.
293 $_ = $self->interpolate ($_, $line);
294 s/\s+$/\n/;
295 if (defined $$self{ITEM}) {
296 $self->item ($_ . "\n");
297 } else {
298 $self->output ($self->reformat ($_ . "\n"));
299 }
300}
69e00e79 301
6055f9d4 302# Called for an interior sequence. Gets the command, argument, and a
303# Pod::InteriorSequence object and is expected to return the resulting text.
3c014959 304# Calls code, bold, italic, file, and link to handle those types of sequences,
305# and handles S<>, E<>, X<>, and Z<> directly.
6055f9d4 306sub interior_sequence {
307 my $self = shift;
308 my $command = shift;
309 local $_ = shift;
310 return '' if ($command eq 'X' || $command eq 'Z');
69e00e79 311
59548eca 312 # Expand escapes into the actual character now, warning if invalid.
6055f9d4 313 if ($command eq 'E') {
2e20e14f 314 if (/^\d+$/) {
315 return chr;
316 } else {
317 return $ESCAPES{$_} if defined $ESCAPES{$_};
59548eca 318 my $seq = shift;
319 my ($file, $line) = $seq->file_line;
320 warn "$file:$line: Unknown escape: E<$_>\n";
2e20e14f 321 return "E<$_>";
322 }
6055f9d4 323 }
69e00e79 324
6055f9d4 325 # For all the other sequences, empty content produces no output.
27f805f4 326 return if $_ eq '';
69e00e79 327
6055f9d4 328 # For S<>, compress all internal whitespace and then map spaces to \01.
329 # When we output the text, we'll map this back.
330 if ($command eq 'S') {
331 s/\s{2,}/ /g;
332 tr/ /\01/;
333 return $_;
334 }
69e00e79 335
6055f9d4 336 # Anything else needs to get dispatched to another method.
337 if ($command eq 'B') { return $self->seq_b ($_) }
338 elsif ($command eq 'C') { return $self->seq_c ($_) }
339 elsif ($command eq 'F') { return $self->seq_f ($_) }
340 elsif ($command eq 'I') { return $self->seq_i ($_) }
341 elsif ($command eq 'L') { return $self->seq_l ($_) }
59548eca 342 else {
343 my $seq = shift;
344 my ($file, $line) = $seq->file_line;
345 warn "$file:$line: Unknown sequence $command<$_>\n";
346 }
6055f9d4 347}
f02a87df 348
6055f9d4 349# Called for each paragraph that's actually part of the POD. We take
59548eca 350# advantage of this opportunity to untabify the input. Also, if given the
351# code option, we may see paragraphs that aren't part of the POD and need to
352# output them directly.
6055f9d4 353sub preprocess_paragraph {
354 my $self = shift;
355 local $_ = shift;
356 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
59548eca 357 $self->output_code ($_) if $self->cutting;
6055f9d4 358 $_;
359}
3ec07288 360
69e00e79 361
3c014959 362##############################################################################
6055f9d4 363# Command paragraphs
3c014959 364##############################################################################
f2506fb2 365
6055f9d4 366# All command paragraphs take the paragraph and the line number.
69e00e79 367
6055f9d4 368# First level heading.
369sub cmd_head1 {
370 my $self = shift;
371 local $_ = shift;
372 s/\s+$//;
27f805f4 373 $_ = $self->interpolate ($_, shift);
6055f9d4 374 if ($$self{alt}) {
375 $self->output ("\n==== $_ ====\n\n");
376 } else {
377 $_ .= "\n" if $$self{loose};
378 $self->output ($_ . "\n");
379 }
380}
69e00e79 381
6055f9d4 382# Second level heading.
383sub cmd_head2 {
384 my $self = shift;
385 local $_ = shift;
386 s/\s+$//;
27f805f4 387 $_ = $self->interpolate ($_, shift);
6055f9d4 388 if ($$self{alt}) {
389 $self->output ("\n== $_ ==\n\n");
390 } else {
391 $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
392 }
393}
69e00e79 394
50a3fd2a 395# Third level heading.
396sub cmd_head3 {
397 my $self = shift;
398 local $_ = shift;
399 s/\s+$//;
400 $_ = $self->interpolate ($_, shift);
401 if ($$self{alt}) {
402 $self->output ("\n= $_ =\n\n");
403 } else {
404 $self->output (' ' x ($$self{indent} * 2 / 3 + 0.5) . $_ . "\n\n");
405 }
406}
407
408# Third level heading.
409sub cmd_head4 {
410 my $self = shift;
411 local $_ = shift;
412 s/\s+$//;
413 $_ = $self->interpolate ($_, shift);
414 if ($$self{alt}) {
415 $self->output ("\n- $_ -\n\n");
416 } else {
417 $self->output (' ' x ($$self{indent} * 3 / 4 + 0.5) . $_ . "\n\n");
418 }
419}
420
6055f9d4 421# Start a list.
422sub cmd_over {
423 my $self = shift;
424 local $_ = shift;
425 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
426 push (@{ $$self{INDENTS} }, $$self{MARGIN});
427 $$self{MARGIN} += ($_ + 0);
428}
69e00e79 429
6055f9d4 430# End a list.
431sub cmd_back {
59548eca 432 my ($self, $text, $line, $paragraph) = @_;
6055f9d4 433 $$self{MARGIN} = pop @{ $$self{INDENTS} };
434 unless (defined $$self{MARGIN}) {
59548eca 435 my $file;
436 ($file, $line) = $paragraph->file_line;
437 warn "$file:$line: Unmatched =back\n";
6055f9d4 438 $$self{MARGIN} = $$self{indent};
439 }
69e00e79 440}
441
6055f9d4 442# An individual list item.
443sub cmd_item {
444 my $self = shift;
445 if (defined $$self{ITEM}) { $self->item }
446 local $_ = shift;
447 s/\s+$//;
448 $$self{ITEM} = $self->interpolate ($_);
449}
69e00e79 450
27f805f4 451# Begin a block for a particular translator. Setting VERBATIM triggers
452# special handling in textblock().
6055f9d4 453sub cmd_begin {
454 my $self = shift;
455 local $_ = shift;
456 my ($kind) = /^(\S+)/ or return;
27f805f4 457 if ($kind eq 'text') {
458 $$self{VERBATIM} = 1;
459 } else {
460 $$self{EXCLUDE} = 1;
461 }
6055f9d4 462}
f2506fb2 463
6055f9d4 464# End a block for a particular translator. We assume that all =begin/=end
27f805f4 465# pairs are properly closed.
6055f9d4 466sub cmd_end {
467 my $self = shift;
27f805f4 468 $$self{EXCLUDE} = 0;
469 $$self{VERBATIM} = 0;
5cdeb5a2 470}
6055f9d4 471
472# One paragraph for a particular translator. Ignore it unless it's intended
27f805f4 473# for text, in which case we treat it as a verbatim text block.
6055f9d4 474sub cmd_for {
475 my $self = shift;
476 local $_ = shift;
477 my $line = shift;
27f805f4 478 return unless s/^text\b[ \t]*\n?//;
479 $self->verbatim ($_, $line);
6055f9d4 480}
f2506fb2 481
69e00e79 482
3c014959 483##############################################################################
6055f9d4 484# Interior sequences
3c014959 485##############################################################################
69e00e79 486
6055f9d4 487# The simple formatting ones. These are here mostly so that subclasses can
488# override them and do more complicated things.
27f805f4 489sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
27f805f4 490sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
6055f9d4 491sub seq_i { return '*' . $_[1] . '*' }
3c014959 492
493# Apply a whole bunch of messy heuristics to not quote things that don't
494# benefit from being quoted. These originally come from Barrie Slaymaker and
495# largely duplicate code in Pod::Man.
ab1f1d91 496sub seq_c {
3c014959 497 my $self = shift;
498 local $_ = shift;
499
500 # A regex that matches the portion of a variable reference that's the
501 # array or hash index, separated out just because we want to use it in
502 # several places in the following regex.
503 my $index = '(?: \[.*\] | \{.*\} )?';
504
505 # Check for things that we don't want to quote, and if we find any of
506 # them, return the string with just a font change and no quoting.
507 m{
508 ^\s*
509 (?:
510 ( [\'\`\"] ) .* \1 # already quoted
511 | \` .* \' # `quoted'
512 | \$+ [\#^]? \S $index # special ($^Foo, $")
513 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
514 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
515 | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )? # a number
516 | 0x [a-fA-F\d]+ # a hex constant
517 )
518 \s*\z
519 }xo && return $_;
520
521 # If we didn't return, go ahead and quote the text.
522 return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
ab1f1d91 523}
69e00e79 524
6055f9d4 525# The complicated one. Handle links. Since this is plain text, we can't
526# actually make any real links, so this is all to figure out what text we
527# print out.
528sub seq_l {
529 my $self = shift;
530 local $_ = shift;
69e00e79 531
6055f9d4 532 # Smash whitespace in case we were split across multiple lines.
533 s/\s+/ /g;
69e00e79 534
6055f9d4 535 # If we were given any explicit text, just output it.
536 if (/^([^|]+)\|/) { return $1 }
537
538 # Okay, leading and trailing whitespace isn't important; get rid of it.
539 s/^\s+//;
540 s/\s+$//;
6055f9d4 541
3c014959 542 # If the argument looks like a URL, return it verbatim. This only handles
543 # URLs that use the server syntax.
16ba52cf 544 if (m%^[a-z]+://\S+$%) { return $_ }
545
3c014959 546 # Default to using the whole content of the link entry as a section name.
547 # Note that L<manpage/> forces a manpage interpretation, as does something
548 # looking like L<manpage(section)>. The latter is an enhancement over the
549 # original Pod::Text.
6055f9d4 550 my ($manpage, $section) = ('', $_);
551 if (/^"\s*(.*?)\s*"$/) {
552 $section = '"' . $1 . '"';
553 } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
554 ($manpage, $section) = ($_, '');
555 } elsif (m%/%) {
556 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
8c634b6e 557 }
558
6055f9d4 559 # Now build the actual output text.
560 my $text = '';
561 if (!length $section) {
562 $text = "the $manpage manpage" if length $manpage;
563 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
564 $text .= 'the ' . $section . ' entry';
565 $text .= (length $manpage) ? " in the $manpage manpage"
566 : " elsewhere in this document";
567 } else {
568 $section =~ s/^\"\s*//;
569 $section =~ s/\s*\"$//;
570 $text .= 'the section on "' . $section . '"';
571 $text .= " in the $manpage manpage" if length $manpage;
69e00e79 572 }
6055f9d4 573 $text;
69e00e79 574}
575
6055f9d4 576
3c014959 577##############################################################################
6055f9d4 578# List handling
3c014959 579##############################################################################
580
581# This method is called whenever an =item command is complete (in other words,
582# we've seen its associated paragraph or know for certain that it doesn't have
583# one). It gets the paragraph associated with the item as an argument. If
584# that argument is empty, just output the item tag; if it contains a newline,
585# output the item tag followed by the newline. Otherwise, see if there's
586# enough room for us to output the item tag in the margin of the text or if we
587# have to put it on a separate line.
6055f9d4 588sub item {
589 my $self = shift;
590 local $_ = shift;
591 my $tag = $$self{ITEM};
592 unless (defined $tag) {
59548eca 593 carp "Item called without tag";
6055f9d4 594 return;
69e00e79 595 }
6055f9d4 596 undef $$self{ITEM};
597 my $indent = $$self{INDENTS}[-1];
598 unless (defined $indent) { $indent = $$self{indent} }
599 my $space = ' ' x $indent;
600 $space =~ s/^ /:/ if $$self{alt};
601 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
27f805f4 602 my $margin = $$self{MARGIN};
603 $$self{MARGIN} = $indent;
604 my $output = $self->reformat ($tag);
605 $output =~ s/\n*$/\n/;
606 $self->output ($output);
607 $$self{MARGIN} = $margin;
6055f9d4 608 $self->output ($self->reformat ($_)) if /\S/;
609 } else {
610 $_ = $self->reformat ($_);
611 s/^ /:/ if ($$self{alt} && $indent > 0);
612 my $tagspace = ' ' x length $tag;
613 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
614 $self->output ($_);
69e00e79 615 }
616}
617
69e00e79 618
3c014959 619##############################################################################
6055f9d4 620# Output formatting
3c014959 621##############################################################################
69e00e79 622
3c014959 623# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
624# because it plays games with tabs. We can't use formline, even though we'd
625# really like to, because it screws up non-printing characters. So we have to
626# do the wrapping ourselves.
6055f9d4 627sub wrap {
628 my $self = shift;
629 local $_ = shift;
630 my $output = '';
631 my $spaces = ' ' x $$self{MARGIN};
632 my $width = $$self{width} - $$self{MARGIN};
633 while (length > $width) {
634 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
635 $output .= $spaces . $1 . "\n";
636 } else {
637 last;
638 }
69e00e79 639 }
6055f9d4 640 $output .= $spaces . $_;
641 $output =~ s/\s+$/\n\n/;
642 $output;
69e00e79 643}
644
6055f9d4 645# Reformat a paragraph of text for the current margin. Takes the text to
646# reformat and returns the formatted text.
647sub reformat {
648 my $self = shift;
649 local $_ = shift;
69e00e79 650
3c014959 651 # If we're trying to preserve two spaces after sentences, do some munging
652 # to support that. Otherwise, smash all repeated whitespace.
6055f9d4 653 if ($$self{sentence}) {
654 s/ +$//mg;
655 s/\.\n/. \n/g;
656 s/\n/ /g;
657 s/ +/ /g;
69e00e79 658 } else {
6055f9d4 659 s/\s+/ /g;
69e00e79 660 }
6055f9d4 661 $self->wrap ($_);
69e00e79 662}
663
6055f9d4 664# Output text to the output device.
665sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
69e00e79 666
59548eca 667# Output a block of code (something that isn't part of the POD text). Called
668# by preprocess_paragraph only if we were given the code option. Exists here
669# only so that it can be overridden by subclasses.
670sub output_code { $_[0]->output ($_[1]) }
671
69e00e79 672
3c014959 673##############################################################################
27f805f4 674# Backwards compatibility
3c014959 675##############################################################################
27f805f4 676
677# The old Pod::Text module did everything in a pod2text() function. This
678# tries to provide the same interface for legacy applications.
679sub pod2text {
680 my @args;
681
682 # This is really ugly; I hate doing option parsing in the middle of a
683 # module. But the old Pod::Text module supported passing flags to its
684 # entry function, so handle -a and -<number>.
685 while ($_[0] =~ /^-/) {
686 my $flag = shift;
687 if ($flag eq '-a') { push (@args, alt => 1) }
688 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
689 else {
690 unshift (@_, $flag);
691 last;
692 }
693 }
694
695 # Now that we know what arguments we're using, create the parser.
696 my $parser = Pod::Text->new (@args);
697
698 # If two arguments were given, the second argument is going to be a file
3c014959 699 # handle. That means we want to call parse_from_filehandle(), which means
700 # we need to turn the first argument into a file handle. Magic open will
701 # handle the <&STDIN case automagically.
27f805f4 702 if (defined $_[1]) {
ab1f1d91 703 my @fhs = @_;
27f805f4 704 local *IN;
ab1f1d91 705 unless (open (IN, $fhs[0])) {
706 croak ("Can't open $fhs[0] for reading: $!\n");
27f805f4 707 return;
708 }
ab1f1d91 709 $fhs[0] = \*IN;
710 return $parser->parse_from_filehandle (@fhs);
27f805f4 711 } else {
712 return $parser->parse_from_file (@_);
713 }
714}
715
716
3c014959 717##############################################################################
6055f9d4 718# Module return value and documentation
3c014959 719##############################################################################
69e00e79 720
6055f9d4 7211;
722__END__
69e00e79 723
6055f9d4 724=head1 NAME
69e00e79 725
6055f9d4 726Pod::Text - Convert POD data to formatted ASCII text
69e00e79 727
6055f9d4 728=head1 SYNOPSIS
69e00e79 729
6055f9d4 730 use Pod::Text;
731 my $parser = Pod::Text->new (sentence => 0, width => 78);
69e00e79 732
6055f9d4 733 # Read POD from STDIN and write to STDOUT.
734 $parser->parse_from_filehandle;
69e00e79 735
6055f9d4 736 # Read POD from file.pod and write to file.txt.
737 $parser->parse_from_file ('file.pod', 'file.txt');
69e00e79 738
6055f9d4 739=head1 DESCRIPTION
5491a304 740
27f805f4 741Pod::Text is a module that can convert documentation in the POD format (the
742preferred language for documenting Perl) into formatted ASCII. It uses no
743special formatting controls or codes whatsoever, and its output is therefore
744suitable for nearly any device.
69e00e79 745
27f805f4 746As a derived class from Pod::Parser, Pod::Text supports the same methods and
747interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
748new parser with C<Pod::Text-E<gt>new()> and then calls either
749parse_from_filehandle() or parse_from_file().
6055f9d4 750
27f805f4 751new() can take options, in the form of key/value pairs, that control the
6055f9d4 752behavior of the parser. The currently recognized options are:
753
754=over 4
755
756=item alt
757
758If set to a true value, selects an alternate output format that, among other
759things, uses a different heading style and marks C<=item> entries with a
760colon in the left margin. Defaults to false.
761
59548eca 762=item code
763
764If set to a true value, the non-POD parts of the input file will be included
765in the output. Useful for viewing code documented with POD blocks with the
766POD rendered and the code left intact.
767
6055f9d4 768=item indent
769
770The number of spaces to indent regular text, and the default indentation for
771C<=over> blocks. Defaults to 4.
772
773=item loose
774
775If set to a true value, a blank line is printed after a C<=head1> heading.
776If set to false (the default), no blank line is printed after C<=head1>,
777although one is still printed after C<=head2>. This is the default because
778it's the expected formatting for manual pages; if you're formatting
779arbitrary text documents, setting this to true may result in more pleasing
780output.
781
ab1f1d91 782=item quotes
783
784Sets the quote marks used to surround CE<lt>> text. If the value is a
785single character, it is used as both the left and right quote; if it is two
786characters, the first character is used as the left quote and the second as
787the right quoted; and if it is four characters, the first two are used as
788the left quote and the second two as the right quote.
789
790This may also be set to the special value C<none>, in which case no quote
791marks are added around CE<lt>> text.
792
6055f9d4 793=item sentence
794
27f805f4 795If set to a true value, Pod::Text will assume that each sentence ends in two
796spaces, and will try to preserve that spacing. If set to false, all
6055f9d4 797consecutive whitespace in non-verbatim paragraphs is compressed into a
798single space. Defaults to true.
799
800=item width
801
802The column at which to wrap text on the right-hand side. Defaults to 76.
803
804=back
805
27f805f4 806The standard Pod::Parser method parse_from_filehandle() takes up to two
6055f9d4 807arguments, the first being the file handle to read POD from and the second
808being the file handle to write the formatted output to. The first defaults
809to STDIN if not given, and the second defaults to STDOUT. The method
27f805f4 810parse_from_file() is almost identical, except that its two arguments are the
811input and output disk files instead. See L<Pod::Parser> for the specific
812details.
6055f9d4 813
814=head1 DIAGNOSTICS
815
816=over 4
817
27f805f4 818=item Bizarre space in item
819
59548eca 820=item Item called without tag
821
822(W) Something has gone wrong in internal C<=item> processing. These
823messages indicate a bug in Pod::Text; you should never see them.
27f805f4 824
825=item Can't open %s for reading: %s
826
827(F) Pod::Text was invoked via the compatibility mode pod2text() interface
828and the input file it was given could not be opened.
829
ab1f1d91 830=item Invalid quote specification "%s"
831
832(F) The quote specification given (the quotes option to the constructor) was
833invalid. A quote specification must be one, two, or four characters long.
834
835=item %s:%d: Unknown command paragraph "%s".
836
837(W) The POD source contained a non-standard command paragraph (something of
838the form C<=command args>) that Pod::Man didn't know about. It was ignored.
839
59548eca 840=item %s:%d: Unknown escape: %s
6055f9d4 841
27f805f4 842(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
843know about.
6055f9d4 844
59548eca 845=item %s:%d: Unknown sequence: %s
6055f9d4 846
27f805f4 847(W) The POD source contained a non-standard internal sequence (something of
848the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
6055f9d4 849
59548eca 850=item %s:%d: Unmatched =back
6055f9d4 851
27f805f4 852(W) Pod::Text encountered a C<=back> command that didn't correspond to an
6055f9d4 853C<=over> command.
854
855=back
856
27f805f4 857=head1 RESTRICTIONS
858
859Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
860output, due to an internal implementation detail.
861
6055f9d4 862=head1 NOTES
863
27f805f4 864This is a replacement for an earlier Pod::Text module written by Tom
865Christiansen. It has a revamped interface, since it now uses Pod::Parser,
866but an interface roughly compatible with the old Pod::Text::pod2text()
867function is still available. Please change to the new calling convention,
868though.
6055f9d4 869
870The original Pod::Text contained code to do formatting via termcap
871sequences, although it wasn't turned on by default and it was problematic to
27f805f4 872get it to work at all. This rewrite doesn't even try to do that, but a
873subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
6055f9d4 874
875=head1 SEE ALSO
876
27f805f4 877L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
878pod2text(1)
6055f9d4 879
880=head1 AUTHOR
881
882Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
883original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
884its conversion to Pod::Parser by Brad Appleton
885E<lt>bradapp@enteract.comE<gt>.
886
3c014959 887=head1 COPYRIGHT AND LICENSE
888
889Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
890
891This program is free software; you may redistribute it and/or modify it
892under the same terms as Perl itself.
893
6055f9d4 894=cut