More tests.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm
CommitLineData
6055f9d4 1# Pod::Text -- Convert POD data to formatted ASCII text.
3c014959 2# $Id: Text.pm,v 2.11 2001/07/10 11:08:10 eagle Exp $
6055f9d4 3#
16ba52cf 4# Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
6055f9d4 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
6055f9d4 7# under the same terms as Perl itself.
8#
3c014959 9# This module replaces the old Pod::Text that came with versions of Perl prior
10# to 5.6.0, and attempts to match its output except for some specific
11# circumstances where other decisions seemed to produce better output. It
12# uses Pod::Parser and is designed to be very easy to subclass.
13#
14# Perl core hackers, please note that this module is also separately
15# maintained outside of the Perl core as part of the podlators. Please send
16# me any patches at the address above in addition to sending them to the
17# standard Perl mailing lists.
6055f9d4 18
3c014959 19##############################################################################
6055f9d4 20# Modules and declarations
3c014959 21##############################################################################
69e00e79 22
6055f9d4 23package Pod::Text;
69e00e79 24
6055f9d4 25require 5.004;
26
27f805f4 27use Carp qw(carp croak);
2e20e14f 28use Exporter ();
27f805f4 29use Pod::Select ();
6055f9d4 30
31use strict;
2e20e14f 32use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
6055f9d4 33
3c014959 34# We inherit from Pod::Select instead of Pod::Parser so that we can be used by
35# Pod::Usage.
2e20e14f 36@ISA = qw(Pod::Select Exporter);
6055f9d4 37
2e20e14f 38# We have to export pod2text for backward compatibility.
39@EXPORT = qw(pod2text);
40
3c014959 41# Don't use the CVS revision as the version, since this module is also in Perl
42# core and too many things could munge CVS magic revision strings. This
43# number should ideally be the same as the CVS revision in podlators, however.
44$VERSION = 2.11;
6055f9d4 45
46
3c014959 47##############################################################################
6055f9d4 48# Table of supported E<> escapes
3c014959 49##############################################################################
6055f9d4 50
3c014959 51# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
52# got it near verbatim from the original Pod::Text. It is therefore credited
53# to Tom Christiansen, and I'm glad I didn't have to write it. :) "iexcl" to
54# "divide" added by Tim Jenness.
6055f9d4 55%ESCAPES = (
56 'amp' => '&', # ampersand
57 'lt' => '<', # left chevron, less-than
58 'gt' => '>', # right chevron, greater-than
59 'quot' => '"', # double quote
ab1f1d91 60 'sol' => '/', # solidus (forward slash)
be3174d2 61 'verbar' => '|', # vertical bar
5cdeb5a2 62
6055f9d4 63 "Aacute" => "\xC1", # capital A, acute accent
64 "aacute" => "\xE1", # small a, acute accent
65 "Acirc" => "\xC2", # capital A, circumflex accent
66 "acirc" => "\xE2", # small a, circumflex accent
67 "AElig" => "\xC6", # capital AE diphthong (ligature)
68 "aelig" => "\xE6", # small ae diphthong (ligature)
69 "Agrave" => "\xC0", # capital A, grave accent
70 "agrave" => "\xE0", # small a, grave accent
71 "Aring" => "\xC5", # capital A, ring
72 "aring" => "\xE5", # small a, ring
73 "Atilde" => "\xC3", # capital A, tilde
74 "atilde" => "\xE3", # small a, tilde
75 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
76 "auml" => "\xE4", # small a, dieresis or umlaut mark
77 "Ccedil" => "\xC7", # capital C, cedilla
78 "ccedil" => "\xE7", # small c, cedilla
79 "Eacute" => "\xC9", # capital E, acute accent
80 "eacute" => "\xE9", # small e, acute accent
81 "Ecirc" => "\xCA", # capital E, circumflex accent
82 "ecirc" => "\xEA", # small e, circumflex accent
83 "Egrave" => "\xC8", # capital E, grave accent
84 "egrave" => "\xE8", # small e, grave accent
85 "ETH" => "\xD0", # capital Eth, Icelandic
86 "eth" => "\xF0", # small eth, Icelandic
87 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
88 "euml" => "\xEB", # small e, dieresis or umlaut mark
ee89c1da 89 "Iacute" => "\xCD", # capital I, acute accent
90 "iacute" => "\xED", # small i, acute accent
6055f9d4 91 "Icirc" => "\xCE", # capital I, circumflex accent
92 "icirc" => "\xEE", # small i, circumflex accent
ee89c1da 93 "Igrave" => "\xCC", # capital I, grave accent
94 "igrave" => "\xEC", # small i, grave accent
6055f9d4 95 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
96 "iuml" => "\xEF", # small i, dieresis or umlaut mark
97 "Ntilde" => "\xD1", # capital N, tilde
98 "ntilde" => "\xF1", # small n, tilde
99 "Oacute" => "\xD3", # capital O, acute accent
100 "oacute" => "\xF3", # small o, acute accent
101 "Ocirc" => "\xD4", # capital O, circumflex accent
102 "ocirc" => "\xF4", # small o, circumflex accent
103 "Ograve" => "\xD2", # capital O, grave accent
104 "ograve" => "\xF2", # small o, grave accent
105 "Oslash" => "\xD8", # capital O, slash
106 "oslash" => "\xF8", # small o, slash
107 "Otilde" => "\xD5", # capital O, tilde
108 "otilde" => "\xF5", # small o, tilde
109 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
110 "ouml" => "\xF6", # small o, dieresis or umlaut mark
111 "szlig" => "\xDF", # small sharp s, German (sz ligature)
112 "THORN" => "\xDE", # capital THORN, Icelandic
113 "thorn" => "\xFE", # small thorn, Icelandic
114 "Uacute" => "\xDA", # capital U, acute accent
115 "uacute" => "\xFA", # small u, acute accent
116 "Ucirc" => "\xDB", # capital U, circumflex accent
117 "ucirc" => "\xFB", # small u, circumflex accent
118 "Ugrave" => "\xD9", # capital U, grave accent
119 "ugrave" => "\xF9", # small u, grave accent
120 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
121 "uuml" => "\xFC", # small u, dieresis or umlaut mark
122 "Yacute" => "\xDD", # capital Y, acute accent
123 "yacute" => "\xFD", # small y, acute accent
124 "yuml" => "\xFF", # small y, dieresis or umlaut mark
5cdeb5a2 125
a3e04946 126 "laquo" => "\xAB", # left pointing double angle quotation mark
127 "lchevron" => "\xAB", # synonym (backwards compatibility)
128 "raquo" => "\xBB", # right pointing double angle quotation mark
129 "rchevron" => "\xBB", # synonym (backwards compatibility)
130
131 "iexcl" => "\xA1", # inverted exclamation mark
132 "cent" => "\xA2", # cent sign
133 "pound" => "\xA3", # (UK) pound sign
134 "curren" => "\xA4", # currency sign
135 "yen" => "\xA5", # yen sign
136 "brvbar" => "\xA6", # broken vertical bar
137 "sect" => "\xA7", # section sign
138 "uml" => "\xA8", # diaresis
139 "copy" => "\xA9", # Copyright symbol
140 "ordf" => "\xAA", # feminine ordinal indicator
141 "not" => "\xAC", # not sign
142 "shy" => "\xAD", # soft hyphen
143 "reg" => "\xAE", # registered trademark
144 "macr" => "\xAF", # macron, overline
145 "deg" => "\xB0", # degree sign
146 "plusmn" => "\xB1", # plus-minus sign
147 "sup2" => "\xB2", # superscript 2
148 "sup3" => "\xB3", # superscript 3
149 "acute" => "\xB4", # acute accent
150 "micro" => "\xB5", # micro sign
151 "para" => "\xB6", # pilcrow sign = paragraph sign
152 "middot" => "\xB7", # middle dot = Georgian comma
153 "cedil" => "\xB8", # cedilla
154 "sup1" => "\xB9", # superscript 1
155 "ordm" => "\xBA", # masculine ordinal indicator
156 "frac14" => "\xBC", # vulgar fraction one quarter
157 "frac12" => "\xBD", # vulgar fraction one half
158 "frac34" => "\xBE", # vulgar fraction three quarters
159 "iquest" => "\xBF", # inverted question mark
160 "times" => "\xD7", # multiplication sign
161 "divide" => "\xF7", # division sign
6055f9d4 162);
69e00e79 163
69e00e79 164
3c014959 165##############################################################################
6055f9d4 166# Initialization
3c014959 167##############################################################################
69e00e79 168
6055f9d4 169# Initialize the object. Must be sure to call our parent initializer.
170sub initialize {
171 my $self = shift;
69e00e79 172
6055f9d4 173 $$self{alt} = 0 unless defined $$self{alt};
174 $$self{indent} = 4 unless defined $$self{indent};
175 $$self{loose} = 0 unless defined $$self{loose};
176 $$self{sentence} = 0 unless defined $$self{sentence};
177 $$self{width} = 76 unless defined $$self{width};
69e00e79 178
ab1f1d91 179 # Figure out what quotes we'll be using for C<> text.
50a3fd2a 180 $$self{quotes} ||= '"';
ab1f1d91 181 if ($$self{quotes} eq 'none') {
182 $$self{LQUOTE} = $$self{RQUOTE} = '';
183 } elsif (length ($$self{quotes}) == 1) {
184 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
185 } elsif ($$self{quotes} =~ /^(.)(.)$/
186 || $$self{quotes} =~ /^(..)(..)$/) {
187 $$self{LQUOTE} = $1;
188 $$self{RQUOTE} = $2;
189 } else {
190 croak qq(Invalid quote specification "$$self{quotes}");
191 }
192
6055f9d4 193 $$self{INDENTS} = []; # Stack of indentations.
194 $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
69e00e79 195
6055f9d4 196 $self->SUPER::initialize;
197}
69e00e79 198
69e00e79 199
3c014959 200##############################################################################
6055f9d4 201# Core overrides
3c014959 202##############################################################################
6055f9d4 203
204# Called for each command paragraph. Gets the command, the associated
205# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
206# the command to a method named the same as the command. =cut is handled
207# internally by Pod::Parser.
208sub command {
209 my $self = shift;
210 my $command = shift;
211 return if $command eq 'pod';
212 return if ($$self{EXCLUDE} && $command ne 'end');
213 $self->item ("\n") if defined $$self{ITEM};
ab1f1d91 214 if ($self->can ('cmd_' . $command)) {
215 $command = 'cmd_' . $command;
216 $self->$command (@_);
217 } else {
218 my ($text, $line, $paragraph) = @_;
5cdeb5a2 219 my $file;
220 ($file, $line) = $paragraph->file_line;
ab1f1d91 221 $text =~ s/\n+\z//;
222 $text = " $text" if ($text =~ /^\S/);
223 warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
224 return;
225 }
6055f9d4 226}
69e00e79 227
3c014959 228# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
229# Pod::Paragraph object. Just output it verbatim, but with tabs converted to
230# spaces.
6055f9d4 231sub verbatim {
232 my $self = shift;
233 return if $$self{EXCLUDE};
234 $self->item if defined $$self{ITEM};
235 local $_ = shift;
236 return if /^\s*$/;
237 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
238 $self->output ($_);
239}
69e00e79 240
3c014959 241# Called for a regular text block. Gets the paragraph, the line number, and a
242# Pod::Paragraph object. Perform interpolation and output the results.
6055f9d4 243sub textblock {
27f805f4 244 my $self = shift;
6055f9d4 245 return if $$self{EXCLUDE};
27f805f4 246 $self->output ($_[0]), return if $$self{VERBATIM};
247 local $_ = shift;
248 my $line = shift;
6055f9d4 249
250 # Perform a little magic to collapse multiple L<> references. This is
27f805f4 251 # here mostly for backwards-compatibility. We'll just rewrite the whole
252 # thing into actual text at this part, bypassing the whole internal
253 # sequence parsing thing.
6055f9d4 254 s{
255 (
256 L< # A link of the form L</something>.
257 /
258 (
259 [:\w]+ # The item has to be a simple word...
260 (\(\))? # ...or simple function.
261 )
262 >
263 (
264 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
5cdeb5a2 265 L<
6055f9d4 266 /
267 (
268 [:\w]+
269 (\(\))?
270 )
271 >
272 )+
273 )
274 } {
275 local $_ = $1;
276 s%L</([^>]+)>%$1%g;
277 my @items = split /(?:,?\s+(?:and\s+)?)/;
278 my $string = "the ";
279 my $i;
280 for ($i = 0; $i < @items; $i++) {
281 $string .= $items[$i];
282 $string .= ", " if @items > 2 && $i != $#items;
283 $string .= " and " if ($i == $#items - 1);
284 }
285 $string .= " entries elsewhere in this document";
286 $string;
287 }gex;
288
289 # Now actually interpolate and output the paragraph.
290 $_ = $self->interpolate ($_, $line);
291 s/\s+$/\n/;
292 if (defined $$self{ITEM}) {
293 $self->item ($_ . "\n");
294 } else {
295 $self->output ($self->reformat ($_ . "\n"));
296 }
297}
69e00e79 298
6055f9d4 299# Called for an interior sequence. Gets the command, argument, and a
300# Pod::InteriorSequence object and is expected to return the resulting text.
3c014959 301# Calls code, bold, italic, file, and link to handle those types of sequences,
302# and handles S<>, E<>, X<>, and Z<> directly.
6055f9d4 303sub interior_sequence {
304 my $self = shift;
305 my $command = shift;
306 local $_ = shift;
307 return '' if ($command eq 'X' || $command eq 'Z');
69e00e79 308
6055f9d4 309 # Expand escapes into the actual character now, carping if invalid.
310 if ($command eq 'E') {
2e20e14f 311 if (/^\d+$/) {
312 return chr;
313 } else {
314 return $ESCAPES{$_} if defined $ESCAPES{$_};
315 carp "Unknown escape: E<$_>";
316 return "E<$_>";
317 }
6055f9d4 318 }
69e00e79 319
6055f9d4 320 # For all the other sequences, empty content produces no output.
27f805f4 321 return if $_ eq '';
69e00e79 322
6055f9d4 323 # For S<>, compress all internal whitespace and then map spaces to \01.
324 # When we output the text, we'll map this back.
325 if ($command eq 'S') {
326 s/\s{2,}/ /g;
327 tr/ /\01/;
328 return $_;
329 }
69e00e79 330
6055f9d4 331 # Anything else needs to get dispatched to another method.
332 if ($command eq 'B') { return $self->seq_b ($_) }
333 elsif ($command eq 'C') { return $self->seq_c ($_) }
334 elsif ($command eq 'F') { return $self->seq_f ($_) }
335 elsif ($command eq 'I') { return $self->seq_i ($_) }
336 elsif ($command eq 'L') { return $self->seq_l ($_) }
337 else { carp "Unknown sequence $command<$_>" }
338}
f02a87df 339
6055f9d4 340# Called for each paragraph that's actually part of the POD. We take
341# advantage of this opportunity to untabify the input.
342sub preprocess_paragraph {
343 my $self = shift;
344 local $_ = shift;
345 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
346 $_;
347}
3ec07288 348
69e00e79 349
3c014959 350##############################################################################
6055f9d4 351# Command paragraphs
3c014959 352##############################################################################
f2506fb2 353
6055f9d4 354# All command paragraphs take the paragraph and the line number.
69e00e79 355
6055f9d4 356# First level heading.
357sub cmd_head1 {
358 my $self = shift;
359 local $_ = shift;
360 s/\s+$//;
27f805f4 361 $_ = $self->interpolate ($_, shift);
6055f9d4 362 if ($$self{alt}) {
363 $self->output ("\n==== $_ ====\n\n");
364 } else {
365 $_ .= "\n" if $$self{loose};
366 $self->output ($_ . "\n");
367 }
368}
69e00e79 369
6055f9d4 370# Second level heading.
371sub cmd_head2 {
372 my $self = shift;
373 local $_ = shift;
374 s/\s+$//;
27f805f4 375 $_ = $self->interpolate ($_, shift);
6055f9d4 376 if ($$self{alt}) {
377 $self->output ("\n== $_ ==\n\n");
378 } else {
379 $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
380 }
381}
69e00e79 382
50a3fd2a 383# Third level heading.
384sub cmd_head3 {
385 my $self = shift;
386 local $_ = shift;
387 s/\s+$//;
388 $_ = $self->interpolate ($_, shift);
389 if ($$self{alt}) {
390 $self->output ("\n= $_ =\n\n");
391 } else {
392 $self->output (' ' x ($$self{indent} * 2 / 3 + 0.5) . $_ . "\n\n");
393 }
394}
395
396# Third level heading.
397sub cmd_head4 {
398 my $self = shift;
399 local $_ = shift;
400 s/\s+$//;
401 $_ = $self->interpolate ($_, shift);
402 if ($$self{alt}) {
403 $self->output ("\n- $_ -\n\n");
404 } else {
405 $self->output (' ' x ($$self{indent} * 3 / 4 + 0.5) . $_ . "\n\n");
406 }
407}
408
6055f9d4 409# Start a list.
410sub cmd_over {
411 my $self = shift;
412 local $_ = shift;
413 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
414 push (@{ $$self{INDENTS} }, $$self{MARGIN});
415 $$self{MARGIN} += ($_ + 0);
416}
69e00e79 417
6055f9d4 418# End a list.
419sub cmd_back {
420 my $self = shift;
421 $$self{MARGIN} = pop @{ $$self{INDENTS} };
422 unless (defined $$self{MARGIN}) {
423 carp "Unmatched =back";
424 $$self{MARGIN} = $$self{indent};
425 }
69e00e79 426}
427
6055f9d4 428# An individual list item.
429sub cmd_item {
430 my $self = shift;
431 if (defined $$self{ITEM}) { $self->item }
432 local $_ = shift;
433 s/\s+$//;
434 $$self{ITEM} = $self->interpolate ($_);
435}
69e00e79 436
27f805f4 437# Begin a block for a particular translator. Setting VERBATIM triggers
438# special handling in textblock().
6055f9d4 439sub cmd_begin {
440 my $self = shift;
441 local $_ = shift;
442 my ($kind) = /^(\S+)/ or return;
27f805f4 443 if ($kind eq 'text') {
444 $$self{VERBATIM} = 1;
445 } else {
446 $$self{EXCLUDE} = 1;
447 }
6055f9d4 448}
f2506fb2 449
6055f9d4 450# End a block for a particular translator. We assume that all =begin/=end
27f805f4 451# pairs are properly closed.
6055f9d4 452sub cmd_end {
453 my $self = shift;
27f805f4 454 $$self{EXCLUDE} = 0;
455 $$self{VERBATIM} = 0;
5cdeb5a2 456}
6055f9d4 457
458# One paragraph for a particular translator. Ignore it unless it's intended
27f805f4 459# for text, in which case we treat it as a verbatim text block.
6055f9d4 460sub cmd_for {
461 my $self = shift;
462 local $_ = shift;
463 my $line = shift;
27f805f4 464 return unless s/^text\b[ \t]*\n?//;
465 $self->verbatim ($_, $line);
6055f9d4 466}
f2506fb2 467
69e00e79 468
3c014959 469##############################################################################
6055f9d4 470# Interior sequences
3c014959 471##############################################################################
69e00e79 472
6055f9d4 473# The simple formatting ones. These are here mostly so that subclasses can
474# override them and do more complicated things.
27f805f4 475sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
27f805f4 476sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
6055f9d4 477sub seq_i { return '*' . $_[1] . '*' }
3c014959 478
479# Apply a whole bunch of messy heuristics to not quote things that don't
480# benefit from being quoted. These originally come from Barrie Slaymaker and
481# largely duplicate code in Pod::Man.
ab1f1d91 482sub seq_c {
3c014959 483 my $self = shift;
484 local $_ = shift;
485
486 # A regex that matches the portion of a variable reference that's the
487 # array or hash index, separated out just because we want to use it in
488 # several places in the following regex.
489 my $index = '(?: \[.*\] | \{.*\} )?';
490
491 # Check for things that we don't want to quote, and if we find any of
492 # them, return the string with just a font change and no quoting.
493 m{
494 ^\s*
495 (?:
496 ( [\'\`\"] ) .* \1 # already quoted
497 | \` .* \' # `quoted'
498 | \$+ [\#^]? \S $index # special ($^Foo, $")
499 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
500 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
501 | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )? # a number
502 | 0x [a-fA-F\d]+ # a hex constant
503 )
504 \s*\z
505 }xo && return $_;
506
507 # If we didn't return, go ahead and quote the text.
508 return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
ab1f1d91 509}
69e00e79 510
6055f9d4 511# The complicated one. Handle links. Since this is plain text, we can't
512# actually make any real links, so this is all to figure out what text we
513# print out.
514sub seq_l {
515 my $self = shift;
516 local $_ = shift;
69e00e79 517
6055f9d4 518 # Smash whitespace in case we were split across multiple lines.
519 s/\s+/ /g;
69e00e79 520
6055f9d4 521 # If we were given any explicit text, just output it.
522 if (/^([^|]+)\|/) { return $1 }
523
524 # Okay, leading and trailing whitespace isn't important; get rid of it.
525 s/^\s+//;
526 s/\s+$//;
6055f9d4 527
3c014959 528 # If the argument looks like a URL, return it verbatim. This only handles
529 # URLs that use the server syntax.
16ba52cf 530 if (m%^[a-z]+://\S+$%) { return $_ }
531
3c014959 532 # Default to using the whole content of the link entry as a section name.
533 # Note that L<manpage/> forces a manpage interpretation, as does something
534 # looking like L<manpage(section)>. The latter is an enhancement over the
535 # original Pod::Text.
6055f9d4 536 my ($manpage, $section) = ('', $_);
537 if (/^"\s*(.*?)\s*"$/) {
538 $section = '"' . $1 . '"';
539 } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
540 ($manpage, $section) = ($_, '');
541 } elsif (m%/%) {
542 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
8c634b6e 543 }
544
6055f9d4 545 # Now build the actual output text.
546 my $text = '';
547 if (!length $section) {
548 $text = "the $manpage manpage" if length $manpage;
549 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
550 $text .= 'the ' . $section . ' entry';
551 $text .= (length $manpage) ? " in the $manpage manpage"
552 : " elsewhere in this document";
553 } else {
554 $section =~ s/^\"\s*//;
555 $section =~ s/\s*\"$//;
556 $text .= 'the section on "' . $section . '"';
557 $text .= " in the $manpage manpage" if length $manpage;
69e00e79 558 }
6055f9d4 559 $text;
69e00e79 560}
561
6055f9d4 562
3c014959 563##############################################################################
6055f9d4 564# List handling
3c014959 565##############################################################################
566
567# This method is called whenever an =item command is complete (in other words,
568# we've seen its associated paragraph or know for certain that it doesn't have
569# one). It gets the paragraph associated with the item as an argument. If
570# that argument is empty, just output the item tag; if it contains a newline,
571# output the item tag followed by the newline. Otherwise, see if there's
572# enough room for us to output the item tag in the margin of the text or if we
573# have to put it on a separate line.
6055f9d4 574sub item {
575 my $self = shift;
576 local $_ = shift;
577 my $tag = $$self{ITEM};
578 unless (defined $tag) {
579 carp "item called without tag";
580 return;
69e00e79 581 }
6055f9d4 582 undef $$self{ITEM};
583 my $indent = $$self{INDENTS}[-1];
584 unless (defined $indent) { $indent = $$self{indent} }
585 my $space = ' ' x $indent;
586 $space =~ s/^ /:/ if $$self{alt};
587 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
27f805f4 588 my $margin = $$self{MARGIN};
589 $$self{MARGIN} = $indent;
590 my $output = $self->reformat ($tag);
591 $output =~ s/\n*$/\n/;
592 $self->output ($output);
593 $$self{MARGIN} = $margin;
6055f9d4 594 $self->output ($self->reformat ($_)) if /\S/;
595 } else {
596 $_ = $self->reformat ($_);
597 s/^ /:/ if ($$self{alt} && $indent > 0);
598 my $tagspace = ' ' x length $tag;
599 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
600 $self->output ($_);
69e00e79 601 }
602}
603
69e00e79 604
3c014959 605##############################################################################
6055f9d4 606# Output formatting
3c014959 607##############################################################################
69e00e79 608
3c014959 609# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
610# because it plays games with tabs. We can't use formline, even though we'd
611# really like to, because it screws up non-printing characters. So we have to
612# do the wrapping ourselves.
6055f9d4 613sub wrap {
614 my $self = shift;
615 local $_ = shift;
616 my $output = '';
617 my $spaces = ' ' x $$self{MARGIN};
618 my $width = $$self{width} - $$self{MARGIN};
619 while (length > $width) {
620 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
621 $output .= $spaces . $1 . "\n";
622 } else {
623 last;
624 }
69e00e79 625 }
6055f9d4 626 $output .= $spaces . $_;
627 $output =~ s/\s+$/\n\n/;
628 $output;
69e00e79 629}
630
6055f9d4 631# Reformat a paragraph of text for the current margin. Takes the text to
632# reformat and returns the formatted text.
633sub reformat {
634 my $self = shift;
635 local $_ = shift;
69e00e79 636
3c014959 637 # If we're trying to preserve two spaces after sentences, do some munging
638 # to support that. Otherwise, smash all repeated whitespace.
6055f9d4 639 if ($$self{sentence}) {
640 s/ +$//mg;
641 s/\.\n/. \n/g;
642 s/\n/ /g;
643 s/ +/ /g;
69e00e79 644 } else {
6055f9d4 645 s/\s+/ /g;
69e00e79 646 }
6055f9d4 647 $self->wrap ($_);
69e00e79 648}
649
6055f9d4 650# Output text to the output device.
651sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
69e00e79 652
69e00e79 653
3c014959 654##############################################################################
27f805f4 655# Backwards compatibility
3c014959 656##############################################################################
27f805f4 657
658# The old Pod::Text module did everything in a pod2text() function. This
659# tries to provide the same interface for legacy applications.
660sub pod2text {
661 my @args;
662
663 # This is really ugly; I hate doing option parsing in the middle of a
664 # module. But the old Pod::Text module supported passing flags to its
665 # entry function, so handle -a and -<number>.
666 while ($_[0] =~ /^-/) {
667 my $flag = shift;
668 if ($flag eq '-a') { push (@args, alt => 1) }
669 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
670 else {
671 unshift (@_, $flag);
672 last;
673 }
674 }
675
676 # Now that we know what arguments we're using, create the parser.
677 my $parser = Pod::Text->new (@args);
678
679 # If two arguments were given, the second argument is going to be a file
3c014959 680 # handle. That means we want to call parse_from_filehandle(), which means
681 # we need to turn the first argument into a file handle. Magic open will
682 # handle the <&STDIN case automagically.
27f805f4 683 if (defined $_[1]) {
ab1f1d91 684 my @fhs = @_;
27f805f4 685 local *IN;
ab1f1d91 686 unless (open (IN, $fhs[0])) {
687 croak ("Can't open $fhs[0] for reading: $!\n");
27f805f4 688 return;
689 }
ab1f1d91 690 $fhs[0] = \*IN;
691 return $parser->parse_from_filehandle (@fhs);
27f805f4 692 } else {
693 return $parser->parse_from_file (@_);
694 }
695}
696
697
3c014959 698##############################################################################
6055f9d4 699# Module return value and documentation
3c014959 700##############################################################################
69e00e79 701
6055f9d4 7021;
703__END__
69e00e79 704
6055f9d4 705=head1 NAME
69e00e79 706
6055f9d4 707Pod::Text - Convert POD data to formatted ASCII text
69e00e79 708
6055f9d4 709=head1 SYNOPSIS
69e00e79 710
6055f9d4 711 use Pod::Text;
712 my $parser = Pod::Text->new (sentence => 0, width => 78);
69e00e79 713
6055f9d4 714 # Read POD from STDIN and write to STDOUT.
715 $parser->parse_from_filehandle;
69e00e79 716
6055f9d4 717 # Read POD from file.pod and write to file.txt.
718 $parser->parse_from_file ('file.pod', 'file.txt');
69e00e79 719
6055f9d4 720=head1 DESCRIPTION
5491a304 721
27f805f4 722Pod::Text is a module that can convert documentation in the POD format (the
723preferred language for documenting Perl) into formatted ASCII. It uses no
724special formatting controls or codes whatsoever, and its output is therefore
725suitable for nearly any device.
69e00e79 726
27f805f4 727As a derived class from Pod::Parser, Pod::Text supports the same methods and
728interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
729new parser with C<Pod::Text-E<gt>new()> and then calls either
730parse_from_filehandle() or parse_from_file().
6055f9d4 731
27f805f4 732new() can take options, in the form of key/value pairs, that control the
6055f9d4 733behavior of the parser. The currently recognized options are:
734
735=over 4
736
737=item alt
738
739If set to a true value, selects an alternate output format that, among other
740things, uses a different heading style and marks C<=item> entries with a
741colon in the left margin. Defaults to false.
742
743=item indent
744
745The number of spaces to indent regular text, and the default indentation for
746C<=over> blocks. Defaults to 4.
747
748=item loose
749
750If set to a true value, a blank line is printed after a C<=head1> heading.
751If set to false (the default), no blank line is printed after C<=head1>,
752although one is still printed after C<=head2>. This is the default because
753it's the expected formatting for manual pages; if you're formatting
754arbitrary text documents, setting this to true may result in more pleasing
755output.
756
ab1f1d91 757=item quotes
758
759Sets the quote marks used to surround CE<lt>> text. If the value is a
760single character, it is used as both the left and right quote; if it is two
761characters, the first character is used as the left quote and the second as
762the right quoted; and if it is four characters, the first two are used as
763the left quote and the second two as the right quote.
764
765This may also be set to the special value C<none>, in which case no quote
766marks are added around CE<lt>> text.
767
6055f9d4 768=item sentence
769
27f805f4 770If set to a true value, Pod::Text will assume that each sentence ends in two
771spaces, and will try to preserve that spacing. If set to false, all
6055f9d4 772consecutive whitespace in non-verbatim paragraphs is compressed into a
773single space. Defaults to true.
774
775=item width
776
777The column at which to wrap text on the right-hand side. Defaults to 76.
778
779=back
780
27f805f4 781The standard Pod::Parser method parse_from_filehandle() takes up to two
6055f9d4 782arguments, the first being the file handle to read POD from and the second
783being the file handle to write the formatted output to. The first defaults
784to STDIN if not given, and the second defaults to STDOUT. The method
27f805f4 785parse_from_file() is almost identical, except that its two arguments are the
786input and output disk files instead. See L<Pod::Parser> for the specific
787details.
6055f9d4 788
789=head1 DIAGNOSTICS
790
791=over 4
792
27f805f4 793=item Bizarre space in item
794
795(W) Something has gone wrong in internal C<=item> processing. This message
796indicates a bug in Pod::Text; you should never see it.
797
798=item Can't open %s for reading: %s
799
800(F) Pod::Text was invoked via the compatibility mode pod2text() interface
801and the input file it was given could not be opened.
802
ab1f1d91 803=item Invalid quote specification "%s"
804
805(F) The quote specification given (the quotes option to the constructor) was
806invalid. A quote specification must be one, two, or four characters long.
807
808=item %s:%d: Unknown command paragraph "%s".
809
810(W) The POD source contained a non-standard command paragraph (something of
811the form C<=command args>) that Pod::Man didn't know about. It was ignored.
812
6055f9d4 813=item Unknown escape: %s
814
27f805f4 815(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
816know about.
6055f9d4 817
818=item Unknown sequence: %s
819
27f805f4 820(W) The POD source contained a non-standard internal sequence (something of
821the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
6055f9d4 822
823=item Unmatched =back
824
27f805f4 825(W) Pod::Text encountered a C<=back> command that didn't correspond to an
6055f9d4 826C<=over> command.
827
828=back
829
27f805f4 830=head1 RESTRICTIONS
831
832Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
833output, due to an internal implementation detail.
834
6055f9d4 835=head1 NOTES
836
27f805f4 837This is a replacement for an earlier Pod::Text module written by Tom
838Christiansen. It has a revamped interface, since it now uses Pod::Parser,
839but an interface roughly compatible with the old Pod::Text::pod2text()
840function is still available. Please change to the new calling convention,
841though.
6055f9d4 842
843The original Pod::Text contained code to do formatting via termcap
844sequences, although it wasn't turned on by default and it was problematic to
27f805f4 845get it to work at all. This rewrite doesn't even try to do that, but a
846subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
6055f9d4 847
848=head1 SEE ALSO
849
27f805f4 850L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
851pod2text(1)
6055f9d4 852
853=head1 AUTHOR
854
855Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
856original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
857its conversion to Pod::Parser by Brad Appleton
858E<lt>bradapp@enteract.comE<gt>.
859
3c014959 860=head1 COPYRIGHT AND LICENSE
861
862Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
863
864This program is free software; you may redistribute it and/or modify it
865under the same terms as Perl itself.
866
6055f9d4 867=cut