1 # Pod::Text -- Convert POD data to formatted ASCII text.
2 # $Id: Text.pm,v 0.2 1999/06/13 02:44:01 eagle Exp $
4 # Copyright 1999 by Russ Allbery <rra@stanford.edu>
6 # This program is free software; you can redistribute it and/or modify it
7 # under the same terms as Perl itself.
9 # This module may potentially be a replacement for Pod::Text, although it
10 # does not (at the current time) attempt to match the output of Pod::Text
11 # and makes several different formatting choices (mostly in the direction of
12 # less markup). It uses Pod::Parser and is designed to be very easy to
15 ############################################################################
16 # Modules and declarations
17 ############################################################################
27 use vars qw(@ISA %ESCAPES $VERSION);
29 @ISA = qw(Pod::Parser);
34 ############################################################################
35 # Table of supported E<> escapes
36 ############################################################################
38 # This table is taken near verbatim from Pod::PlainText in Pod::Parser,
39 # which got it near verbatim from Pod::Text. It is therefore credited to
40 # Tom Christiansen, and I'm glad I didn't have to write it. :)
42 'amp' => '&', # ampersand
43 'lt' => '<', # left chevron, less-than
44 'gt' => '>', # right chevron, greater-than
45 'quot' => '"', # double quote
47 "Aacute" => "\xC1", # capital A, acute accent
48 "aacute" => "\xE1", # small a, acute accent
49 "Acirc" => "\xC2", # capital A, circumflex accent
50 "acirc" => "\xE2", # small a, circumflex accent
51 "AElig" => "\xC6", # capital AE diphthong (ligature)
52 "aelig" => "\xE6", # small ae diphthong (ligature)
53 "Agrave" => "\xC0", # capital A, grave accent
54 "agrave" => "\xE0", # small a, grave accent
55 "Aring" => "\xC5", # capital A, ring
56 "aring" => "\xE5", # small a, ring
57 "Atilde" => "\xC3", # capital A, tilde
58 "atilde" => "\xE3", # small a, tilde
59 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
60 "auml" => "\xE4", # small a, dieresis or umlaut mark
61 "Ccedil" => "\xC7", # capital C, cedilla
62 "ccedil" => "\xE7", # small c, cedilla
63 "Eacute" => "\xC9", # capital E, acute accent
64 "eacute" => "\xE9", # small e, acute accent
65 "Ecirc" => "\xCA", # capital E, circumflex accent
66 "ecirc" => "\xEA", # small e, circumflex accent
67 "Egrave" => "\xC8", # capital E, grave accent
68 "egrave" => "\xE8", # small e, grave accent
69 "ETH" => "\xD0", # capital Eth, Icelandic
70 "eth" => "\xF0", # small eth, Icelandic
71 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
72 "euml" => "\xEB", # small e, dieresis or umlaut mark
73 "Iacute" => "\xCD", # capital I, acute accent
74 "iacute" => "\xED", # small i, acute accent
75 "Icirc" => "\xCE", # capital I, circumflex accent
76 "icirc" => "\xEE", # small i, circumflex accent
77 "Igrave" => "\xCD", # capital I, grave accent
78 "igrave" => "\xED", # small i, grave accent
79 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
80 "iuml" => "\xEF", # small i, dieresis or umlaut mark
81 "Ntilde" => "\xD1", # capital N, tilde
82 "ntilde" => "\xF1", # small n, tilde
83 "Oacute" => "\xD3", # capital O, acute accent
84 "oacute" => "\xF3", # small o, acute accent
85 "Ocirc" => "\xD4", # capital O, circumflex accent
86 "ocirc" => "\xF4", # small o, circumflex accent
87 "Ograve" => "\xD2", # capital O, grave accent
88 "ograve" => "\xF2", # small o, grave accent
89 "Oslash" => "\xD8", # capital O, slash
90 "oslash" => "\xF8", # small o, slash
91 "Otilde" => "\xD5", # capital O, tilde
92 "otilde" => "\xF5", # small o, tilde
93 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
94 "ouml" => "\xF6", # small o, dieresis or umlaut mark
95 "szlig" => "\xDF", # small sharp s, German (sz ligature)
96 "THORN" => "\xDE", # capital THORN, Icelandic
97 "thorn" => "\xFE", # small thorn, Icelandic
98 "Uacute" => "\xDA", # capital U, acute accent
99 "uacute" => "\xFA", # small u, acute accent
100 "Ucirc" => "\xDB", # capital U, circumflex accent
101 "ucirc" => "\xFB", # small u, circumflex accent
102 "Ugrave" => "\xD9", # capital U, grave accent
103 "ugrave" => "\xF9", # small u, grave accent
104 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
105 "uuml" => "\xFC", # small u, dieresis or umlaut mark
106 "Yacute" => "\xDD", # capital Y, acute accent
107 "yacute" => "\xFD", # small y, acute accent
108 "yuml" => "\xFF", # small y, dieresis or umlaut mark
110 "lchevron" => "\xAB", # left chevron (double less than)
111 "rchevron" => "\xBB", # right chevron (double greater than)
115 ############################################################################
117 ############################################################################
119 # Initialize the object. Must be sure to call our parent initializer.
123 $$self{alt} = 0 unless defined $$self{alt};
124 $$self{indent} = 4 unless defined $$self{indent};
125 $$self{loose} = 0 unless defined $$self{loose};
126 $$self{sentence} = 0 unless defined $$self{sentence};
127 $$self{width} = 76 unless defined $$self{width};
129 $$self{BEGUN} = []; # Stack of =begin blocks.
130 $$self{INDENTS} = []; # Stack of indentations.
131 $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
133 $self->SUPER::initialize;
137 ############################################################################
139 ############################################################################
141 # Called for each command paragraph. Gets the command, the associated
142 # paragraph, the line number, and a Pod::Paragraph object. Just dispatches
143 # the command to a method named the same as the command. =cut is handled
144 # internally by Pod::Parser.
148 return if $command eq 'pod';
149 return if ($$self{EXCLUDE} && $command ne 'end');
150 $self->item ("\n") if defined $$self{ITEM};
151 $command = 'cmd_' . $command;
152 $self->$command (@_);
155 # Called for a verbatim paragraph. Gets the paragraph, the line number, and
156 # a Pod::Paragraph object. Just output it verbatim, but with tabs converted
160 return if $$self{EXCLUDE};
161 $self->item if defined $$self{ITEM};
164 s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
168 # Called for a regular text block. Gets the paragraph, the line number, and
169 # a Pod::Paragraph object. Perform interpolation and output the results.
171 my ($self, $text, $line) = @_;
172 return if $$self{EXCLUDE};
175 # Perform a little magic to collapse multiple L<> references. This is
176 # here mostly for backwards-compatibility with Pod::Text. We'll just
177 # rewrite the whole thing into actual text at this part, bypassing the
178 # whole internal sequence parsing thing.
181 L< # A link of the form L</something>.
184 [:\w]+ # The item has to be a simple word...
185 (\(\))? # ...or simple function.
189 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
202 my @items = split /(?:,?\s+(?:and\s+)?)/;
205 for ($i = 0; $i < @items; $i++) {
206 $string .= $items[$i];
207 $string .= ", " if @items > 2 && $i != $#items;
208 $string .= " and " if ($i == $#items - 1);
210 $string .= " entries elsewhere in this document";
214 # Now actually interpolate and output the paragraph.
215 $_ = $self->interpolate ($_, $line);
217 if (defined $$self{ITEM}) {
218 $self->item ($_ . "\n");
220 $self->output ($self->reformat ($_ . "\n"));
224 # Called for an interior sequence. Gets the command, argument, and a
225 # Pod::InteriorSequence object and is expected to return the resulting text.
226 # Calls code, bold, italic, file, and link to handle those types of
227 # sequences, and handles S<>, E<>, X<>, and Z<> directly.
228 sub interior_sequence {
232 return '' if ($command eq 'X' || $command eq 'Z');
234 # Expand escapes into the actual character now, carping if invalid.
235 if ($command eq 'E') {
236 return $ESCAPES{$_} if defined $ESCAPES{$_};
237 carp "Unknown escape: E<$_>";
241 # For all the other sequences, empty content produces no output.
244 # For S<>, compress all internal whitespace and then map spaces to \01.
245 # When we output the text, we'll map this back.
246 if ($command eq 'S') {
252 # Anything else needs to get dispatched to another method.
253 if ($command eq 'B') { return $self->seq_b ($_) }
254 elsif ($command eq 'C') { return $self->seq_c ($_) }
255 elsif ($command eq 'F') { return $self->seq_f ($_) }
256 elsif ($command eq 'I') { return $self->seq_i ($_) }
257 elsif ($command eq 'L') { return $self->seq_l ($_) }
258 else { carp "Unknown sequence $command<$_>" }
261 # Called for each paragraph that's actually part of the POD. We take
262 # advantage of this opportunity to untabify the input.
263 sub preprocess_paragraph {
266 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
271 ############################################################################
273 ############################################################################
275 # All command paragraphs take the paragraph and the line number.
277 # First level heading.
283 $self->output ("\n==== $_ ====\n\n");
285 $_ .= "\n" if $$self{loose};
286 $self->output ($_ . "\n");
290 # Second level heading.
296 $self->output ("\n== $_ ==\n\n");
298 $self->output (' ' x ($$self{indent} / 2) . $_ . "\n\n");
306 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
307 push (@{ $$self{INDENTS} }, $$self{MARGIN});
308 $$self{MARGIN} += ($_ + 0);
314 $$self{MARGIN} = pop @{ $$self{INDENTS} };
315 unless (defined $$self{MARGIN}) {
316 carp "Unmatched =back";
317 $$self{MARGIN} = $$self{indent};
321 # An individual list item.
324 if (defined $$self{ITEM}) { $self->item }
327 $$self{ITEM} = $self->interpolate ($_);
330 # Begin a block for a particular translator. To allow for weird nested
331 # =begin blocks, keep track of how many blocks we were excluded from and
332 # only unwind one level with each =end.
336 my ($kind) = /^(\S+)/ or return;
337 push (@{ $$self{BEGUN} }, $kind);
338 $$self{EXCLUDE}++ unless $kind eq 'text';
341 # End a block for a particular translator. We assume that all =begin/=end
342 # pairs are properly nested and just pop the previous one.
345 my $kind = pop @{ $$self{BEGUN} };
346 $$self{EXCLUDE}-- if $$self{EXCLUDE};
349 # One paragraph for a particular translator. Ignore it unless it's intended
350 # for text, in which case we treat it as either a normal text block or a
351 # verbatim text block, depending on whether it's indented.
356 return unless s/^text\b[ \t]*//;
358 $self->verbatim ($_, $line);
360 $self->textblock ($_, $line);
365 ############################################################################
367 ############################################################################
369 # The simple formatting ones. These are here mostly so that subclasses can
370 # override them and do more complicated things.
371 sub seq_b { my $self = shift; return $$self{alt} ? "``$_[0]''" : $_[0] }
372 sub seq_c { my $self = shift; return $$self{alt} ? "``$_[0]''" : "`$_[0]'" }
373 sub seq_f { my $self = shift; return $$self{alt} ? "\"$_[0]\"" : $_[0] }
374 sub seq_i { return '*' . $_[1] . '*' }
376 # The complicated one. Handle links. Since this is plain text, we can't
377 # actually make any real links, so this is all to figure out what text we
383 # Smash whitespace in case we were split across multiple lines.
386 # If we were given any explicit text, just output it.
387 if (/^([^|]+)\|/) { return $1 }
389 # Okay, leading and trailing whitespace isn't important; get rid of it.
394 # Default to using the whole content of the link entry as a section
395 # name. Note that L<manpage/> forces a manpage interpretation, as does
396 # something looking like L<manpage(section)>. The latter is an
397 # enhancement over the original Pod::Text.
398 my ($manpage, $section) = ('', $_);
399 if (/^"\s*(.*?)\s*"$/) {
400 $section = '"' . $1 . '"';
401 } elsif (m/^[-:.\w]+(?:\(\S+\))?$/) {
402 ($manpage, $section) = ($_, '');
404 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
407 # Now build the actual output text.
409 if (!length $section) {
410 $text = "the $manpage manpage" if length $manpage;
411 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
412 $text .= 'the ' . $section . ' entry';
413 $text .= (length $manpage) ? " in the $manpage manpage"
414 : " elsewhere in this document";
416 $section =~ s/^\"\s*//;
417 $section =~ s/\s*\"$//;
418 $text .= 'the section on "' . $section . '"';
419 $text .= " in the $manpage manpage" if length $manpage;
425 ############################################################################
427 ############################################################################
429 # This method is called whenever an =item command is complete (in other
430 # words, we've seen its associated paragraph or know for certain that it
431 # doesn't have one). It gets the paragraph associated with the item as an
432 # argument. If that argument is empty, just output the item tag; if it
433 # contains a newline, output the item tag followed by the newline.
434 # Otherwise, see if there's enough room for us to output the item tag in the
435 # margin of the text or if we have to put it on a separate line.
439 my $tag = $$self{ITEM};
440 unless (defined $tag) {
441 carp "item called without tag";
445 my $indent = $$self{INDENTS}[-1];
446 unless (defined $indent) { $indent = $$self{indent} }
447 my $space = ' ' x $indent;
448 $space =~ s/^ /:/ if $$self{alt};
449 if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
450 $self->output ($space . $tag . "\n");
451 $self->output ($self->reformat ($_)) if /\S/;
453 $_ = $self->reformat ($_);
454 s/^ /:/ if ($$self{alt} && $indent > 0);
455 my $tagspace = ' ' x length $tag;
456 s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
462 ############################################################################
464 ############################################################################
466 # Wrap a line, indenting by the current left margin. We can't use
467 # Text::Wrap because it plays games with tabs. We can't use formline, even
468 # though we'd really like to, because it screws up non-printing characters.
469 # So we have to do the wrapping ourselves.
474 my $spaces = ' ' x $$self{MARGIN};
475 my $width = $$self{width} - $$self{MARGIN};
476 while (length > $width) {
477 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
478 $output .= $spaces . $1 . "\n";
483 $output .= $spaces . $_;
484 $output =~ s/\s+$/\n\n/;
488 # Reformat a paragraph of text for the current margin. Takes the text to
489 # reformat and returns the formatted text.
494 # If we're trying to preserve two spaces after sentences, do some
495 # munging to support that. Otherwise, smash all repeated whitespace.
496 if ($$self{sentence}) {
507 # Output text to the output device.
508 sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
511 ############################################################################
512 # Module return value and documentation
513 ############################################################################
520 Pod::Text - Convert POD data to formatted ASCII text
525 my $parser = Pod::Text->new (sentence => 0, width => 78);
527 # Read POD from STDIN and write to STDOUT.
528 $parser->parse_from_filehandle;
530 # Read POD from file.pod and write to file.txt.
531 $parser->parse_from_file ('file.pod', 'file.txt');
535 Pod::Text is a module that can convert documentation in the POD format
536 (such as can be found throughout the Perl distribution) into formatted
537 ASCII. It uses no special formatting controls or codes whatsoever, and its
538 output is therefore suitable for nearly any device.
540 As a derived class from Pod::Parser, Pod::Text supports the same
541 methods and interfaces. See L<Pod::Parser> for all the details; briefly,
542 one creates a new parser with C<Pod::Text-E<gt>new()> and then calls
543 either C<parse_from_filehandle()> or C<parse_from_file()>.
545 C<new()> can take options, in the form of key/value pairs, that control the
546 behavior of the parser. The currently recognized options are:
552 If set to a true value, selects an alternate output format that, among other
553 things, uses a different heading style and marks C<=item> entries with a
554 colon in the left margin. Defaults to false.
558 The number of spaces to indent regular text, and the default indentation for
559 C<=over> blocks. Defaults to 4.
563 If set to a true value, a blank line is printed after a C<=head1> heading.
564 If set to false (the default), no blank line is printed after C<=head1>,
565 although one is still printed after C<=head2>. This is the default because
566 it's the expected formatting for manual pages; if you're formatting
567 arbitrary text documents, setting this to true may result in more pleasing
572 If set to a true value, Pod::Text will assume that each sentence ends
573 in two spaces, and will try to preserve that spacing. If set to false, all
574 consecutive whitespace in non-verbatim paragraphs is compressed into a
575 single space. Defaults to true.
579 The column at which to wrap text on the right-hand side. Defaults to 76.
583 The standard Pod::Parser method C<parse_from_filehandle()> takes up to two
584 arguments, the first being the file handle to read POD from and the second
585 being the file handle to write the formatted output to. The first defaults
586 to STDIN if not given, and the second defaults to STDOUT. The method
587 C<parse_from_file()> is almost identical, except that its two arguments are
588 the input and output disk files instead. See L<Pod::Parser> for the
595 =item Unknown escape: %s
597 The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text
600 =item Unknown sequence: %s
602 The POD source contained a non-standard internal sequence (something of the
603 form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
605 =item Unmatched =back
607 Pod::Text encountered a C<=back> command that didn't correspond to an
614 I'm hoping this module will eventually replace Pod::Text in Perl core once
615 Pod::Parser has been added to Perl core. Accordingly, don't be surprised if
616 the name of this module changes to Pod::Text down the road.
618 The original Pod::Text contained code to do formatting via termcap
619 sequences, although it wasn't turned on by default and it was problematic to
620 get it to work at all. This module doesn't even try to do that, but a
621 subclass of it does. Look for Pod::Text::Termcap.
625 L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>
629 Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
630 original Pod::Text by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt> and
631 its conversion to Pod::Parser by Brad Appleton
632 E<lt>bradapp@enteract.comE<gt>.