1 # Pod::Text -- Convert POD data to formatted ASCII text.
2 # $Id: Text.pm,v 3.1 2005/03/19 19:40:01 eagle Exp $
4 # Copyright 1999, 2000, 2001, 2002, 2004 by Russ Allbery <rra@stanford.edu>
6 # This program is free software; you may redistribute it and/or modify it
7 # under the same terms as Perl itself.
9 # This module converts POD to formatted text. It replaces the old Pod::Text
10 # module that came with versions of Perl prior to 5.6.0 and attempts to match
11 # its output except for some specific circumstances where other decisions
12 # seemed to produce better output. It uses Pod::Parser and is designed to be
13 # very easy to subclass.
15 # Perl core hackers, please note that this module is also separately
16 # maintained outside of the Perl core as part of the podlators. Please send
17 # me any patches at the address above in addition to sending them to the
18 # standard Perl mailing lists.
20 ##############################################################################
21 # Modules and declarations
22 ##############################################################################
29 use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
31 use Carp qw(carp croak);
35 @ISA = qw(Pod::Simple Exporter);
37 # We have to export pod2text for backward compatibility.
38 @EXPORT = qw(pod2text);
40 # Don't use the CVS revision as the version, since this module is also in Perl
41 # core and too many things could munge CVS magic revision strings. This
42 # number should ideally be the same as the CVS revision in podlators, however.
45 ##############################################################################
47 ##############################################################################
49 # This function handles code blocks. It's registered as a callback to
50 # Pod::Simple and therefore doesn't work as a regular method call, but all it
51 # does is call output_code with the line.
53 my ($line, $number, $parser) = @_;
54 $parser->output_code ($line . "\n");
57 # Initialize the object and set various Pod::Simple options that we need.
58 # Here, we also process any additional options passed to the constructor or
59 # set up defaults if none were given. Note that all internal object keys are
60 # in all-caps, reserving all lower-case object keys for Pod::Simple and user
64 my $self = $class->SUPER::new;
66 # Tell Pod::Simple to handle S<> by automatically inserting .
67 $self->nbsp_for_S (1);
69 # Tell Pod::Simple to keep whitespace whenever possible.
70 if ($self->can ('preserve_whitespace')) {
71 $self->preserve_whitespace (1);
73 $self->fullstop_space_harden (1);
76 # The =for and =begin targets that we accept.
77 $self->accept_targets (qw/text TEXT/);
79 # Ensure that contiguous blocks of code are merged together. Otherwise,
80 # some of the guesswork heuristics don't work right.
81 $self->merge_text (1);
83 # Pod::Simple doesn't do anything useful with our arguments, but we want
84 # to put them in our object as hash keys and values. This could cause
85 # problems if we ever clash with Pod::Simple's own internal class
88 my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
89 %$self = (%$self, @opts);
91 # Initialize various things from our parameters.
92 $$self{opt_alt} = 0 unless defined $$self{opt_alt};
93 $$self{opt_indent} = 4 unless defined $$self{opt_indent};
94 $$self{opt_margin} = 0 unless defined $$self{opt_margin};
95 $$self{opt_loose} = 0 unless defined $$self{opt_loose};
96 $$self{opt_sentence} = 0 unless defined $$self{opt_sentence};
97 $$self{opt_width} = 76 unless defined $$self{opt_width};
99 # Figure out what quotes we'll be using for C<> text.
100 $$self{opt_quotes} ||= '"';
101 if ($$self{opt_quotes} eq 'none') {
102 $$self{LQUOTE} = $$self{RQUOTE} = '';
103 } elsif (length ($$self{opt_quotes}) == 1) {
104 $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
105 } elsif ($$self{opt_quotes} =~ /^(.)(.)$/
106 || $$self{opt_quotes} =~ /^(..)(..)$/) {
110 croak qq(Invalid quote specification "$$self{opt_quotes}");
113 # If requested, do something with the non-POD text.
114 $self->code_handler (\&handle_code) if $$self{opt_code};
116 # Return the created object.
120 ##############################################################################
122 ##############################################################################
124 # This is the glue that connects the code below with Pod::Simple itself. The
125 # goal is to convert the event stream coming from the POD parser into method
126 # calls to handlers once the complete content of a tag has been seen. Each
127 # paragraph or POD command will have textual content associated with it, and
128 # as soon as all of a paragraph or POD command has been seen, that content
129 # will be passed in to the corresponding method for handling that type of
130 # object. The exceptions are handlers for lists, which have opening tag
131 # handlers and closing tag handlers that will be called right away.
133 # The internal hash key PENDING is used to store the contents of a tag until
134 # all of it has been seen. It holds a stack of open tags, each one
135 # represented by a tuple of the attributes hash for the tag and the contents
138 # Add a block of text to the contents of the current node, formatting it
139 # according to the current formatting instructions as we do.
141 my ($self, $text) = @_;
142 my $tag = $$self{PENDING}[-1];
146 # Given an element name, get the corresponding method name.
147 sub method_for_element {
148 my ($self, $element) = @_;
150 $element =~ tr/A-Z/a-z/;
151 $element =~ tr/_a-z0-9//cd;
155 # Handle the start of a new element. If cmd_element is defined, assume that
156 # we need to collect the entire tree for this element before passing it to the
157 # element method, and create a new tree into which we'll collect blocks of
158 # text and nested elements. Otherwise, if start_element is defined, call it.
159 sub _handle_element_start {
160 my ($self, $element, $attrs) = @_;
161 my $method = $self->method_for_element ($element);
163 # If we have a command handler, we need to accumulate the contents of the
164 # tag before calling it.
165 if ($self->can ("cmd_$method")) {
166 push (@{ $$self{PENDING} }, [ $attrs, '' ]);
167 } elsif ($self->can ("start_$method")) {
168 my $method = 'start_' . $method;
169 $self->$method ($attrs, '');
173 # Handle the end of an element. If we had a cmd_ method for this element,
174 # this is where we pass along the text that we've accumulated. Otherwise, if
175 # we have an end_ method for the element, call that.
176 sub _handle_element_end {
177 my ($self, $element) = @_;
178 my $method = $self->method_for_element ($element);
180 # If we have a command handler, pull off the pending text and pass it to
181 # the handler along with the saved attribute hash.
182 if ($self->can ("cmd_$method")) {
183 my $tag = pop @{ $$self{PENDING} };
184 my $method = 'cmd_' . $method;
185 my $text = $self->$method (@$tag);
187 if (@{ $$self{PENDING} } > 1) {
188 $$self{PENDING}[-1][1] .= $text;
190 $self->output ($text);
193 } elsif ($self->can ("end_$method")) {
194 my $method = 'end_' . $method;
199 ##############################################################################
201 ##############################################################################
203 # Wrap a line, indenting by the current left margin. We can't use Text::Wrap
204 # because it plays games with tabs. We can't use formline, even though we'd
205 # really like to, because it screws up non-printing characters. So we have to
206 # do the wrapping ourselves.
211 my $spaces = ' ' x $$self{MARGIN};
212 my $width = $$self{opt_width} - $$self{MARGIN};
213 while (length > $width) {
214 if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
215 $output .= $spaces . $1 . "\n";
220 $output .= $spaces . $_;
221 $output =~ s/\s+$/\n\n/;
225 # Reformat a paragraph of text for the current margin. Takes the text to
226 # reformat and returns the formatted text.
231 # If we're trying to preserve two spaces after sentences, do some munging
232 # to support that. Otherwise, smash all repeated whitespace.
233 if ($$self{opt_sentence}) {
241 return $self->wrap ($_);
244 # Output text to the output device.
246 my ($self, $text) = @_;
247 $text =~ tr/\240\255/ /d;
248 print { $$self{output_fh} } $text;
251 # Output a block of code (something that isn't part of the POD text). Called
252 # by preprocess_paragraph only if we were given the code option. Exists here
253 # only so that it can be overridden by subclasses.
254 sub output_code { $_[0]->output ($_[1]) }
256 ##############################################################################
257 # Document initialization
258 ##############################################################################
260 # Set up various things that have to be initialized on a per-document basis.
263 my $margin = $$self{opt_indent} + $$self{opt_margin};
265 # Initialize a few per-document variables.
266 $$self{INDENTS} = []; # Stack of indentations.
267 $$self{MARGIN} = $margin; # Default left margin.
268 $$self{PENDING} = [[]]; # Pending output.
273 ##############################################################################
275 ##############################################################################
277 # This method is called whenever an =item command is complete (in other words,
278 # we've seen its associated paragraph or know for certain that it doesn't have
279 # one). It gets the paragraph associated with the item as an argument. If
280 # that argument is empty, just output the item tag; if it contains a newline,
281 # output the item tag followed by the newline. Otherwise, see if there's
282 # enough room for us to output the item tag in the margin of the text or if we
283 # have to put it on a separate line.
285 my ($self, $text) = @_;
286 my $tag = $$self{ITEM};
287 unless (defined $tag) {
288 carp "Item called without tag";
293 # Calculate the indentation and margin. $fits is set to true if the tag
294 # will fit into the margin of the paragraph given our indentation level.
295 my $indent = $$self{INDENTS}[-1];
296 $indent = $$self{opt_indent} unless defined $indent;
297 my $margin = ' ' x $$self{opt_margin};
298 my $fits = ($$self{MARGIN} - $indent >= length ($tag) + 1);
300 # If the tag doesn't fit, or if we have no associated text, print out the
301 # tag separately. Otherwise, put the tag in the margin of the paragraph.
302 if (!$text || $text =~ /^\s+$/ || !$fits) {
303 my $realindent = $$self{MARGIN};
304 $$self{MARGIN} = $indent;
305 my $output = $self->reformat ($tag);
306 $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
307 $output =~ s/\n*$/\n/;
309 # If the text is just whitespace, we have an empty item paragraph;
310 # this can result from =over/=item/=back without any intermixed
311 # paragraphs. Insert some whitespace to keep the =item from merging
312 # into the next paragraph.
313 $output .= "\n" if $text && $text =~ /^\s*$/;
315 $self->output ($output);
316 $$self{MARGIN} = $realindent;
317 $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
319 my $space = ' ' x $indent;
320 $space =~ s/^$margin /$margin:/ if $$self{opt_alt};
321 $text = $self->reformat ($text);
322 $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
323 my $tagspace = ' ' x length $tag;
324 $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
325 $self->output ($text);
329 # Handle a basic block of text. The only tricky thing here is that if there
330 # is a pending item tag, we need to format this as an item paragraph.
332 my ($self, $attrs, $text) = @_;
334 if (defined $$self{ITEM}) {
335 $self->item ($text . "\n");
337 $self->output ($self->reformat ($text . "\n"));
342 # Handle a verbatim paragraph. Just print it out, but indent it according to
345 my ($self, $attrs, $text) = @_;
346 $self->item if defined $$self{ITEM};
347 return if $text =~ /^\s*$/;
348 $text =~ s/^(\n*)(\s*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
349 $text =~ s/\s*$/\n\n/;
350 $self->output ($text);
354 # Handle literal text (produced by =for and similar constructs). Just output
355 # it with the minimum of changes.
357 my ($self, $attrs, $text) = @_;
359 $text =~ s/\n{0,2}$/\n/;
360 $self->output ($text);
364 ##############################################################################
366 ##############################################################################
368 # The common code for handling all headers. Takes the header text, the
369 # indentation, and the surrounding marker for the alt formatting method.
371 my ($self, $text, $indent, $marker) = @_;
372 $self->item ("\n\n") if defined $$self{ITEM};
374 if ($$self{opt_alt}) {
375 my $closemark = reverse (split (//, $marker));
376 my $margin = ' ' x $$self{opt_margin};
377 $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
379 $text .= "\n" if $$self{opt_loose};
380 my $margin = ' ' x ($$self{opt_margin} + $indent);
381 $self->output ($margin . $text . "\n");
386 # First level heading.
388 my ($self, $attrs, $text) = @_;
389 $self->heading ($text, 0, '====');
392 # Second level heading.
394 my ($self, $attrs, $text) = @_;
395 $self->heading ($text, $$self{opt_indent} / 2, '== ');
398 # Third level heading.
400 my ($self, $attrs, $text) = @_;
401 $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= ');
404 # Fourth level heading.
406 my ($self, $attrs, $text) = @_;
407 $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- ');
410 ##############################################################################
412 ##############################################################################
414 # Handle the beginning of an =over block. Takes the type of the block as the
415 # first argument, and then the attr hash. This is called by the handlers for
416 # the four different types of lists (bullet, number, text, and block).
417 sub over_common_start {
418 my ($self, $attrs) = @_;
419 $self->item ("\n\n") if defined $$self{ITEM};
421 # Find the indentation level.
422 my $indent = $$attrs{indent};
423 unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
424 $indent = $$self{opt_indent};
427 # Add this to our stack of indents and increase our current margin.
428 push (@{ $$self{INDENTS} }, $$self{MARGIN});
429 $$self{MARGIN} += ($indent + 0);
433 # End an =over block. Takes no options other than the class pointer. Output
434 # any pending items and then pop one level of indentation.
435 sub over_common_end {
437 $self->item ("\n\n") if defined $$self{ITEM};
438 $$self{MARGIN} = pop @{ $$self{INDENTS} };
442 # Dispatch the start and end calls as appropriate.
443 sub start_over_bullet { $_[0]->over_common_start ($_[1]) }
444 sub start_over_number { $_[0]->over_common_start ($_[1]) }
445 sub start_over_text { $_[0]->over_common_start ($_[1]) }
446 sub start_over_block { $_[0]->over_common_start ($_[1]) }
447 sub end_over_bullet { $_[0]->over_common_end }
448 sub end_over_number { $_[0]->over_common_end }
449 sub end_over_text { $_[0]->over_common_end }
450 sub end_over_block { $_[0]->over_common_end }
452 # The common handler for all item commands. Takes the type of the item, the
453 # attributes, and then the text of the item.
455 my ($self, $type, $attrs, $text) = @_;
456 $self->item if defined $$self{ITEM};
458 # Clean up the text. We want to end up with two variables, one ($text)
459 # which contains any body text after taking out the item portion, and
460 # another ($item) which contains the actual item text. Note the use of
461 # the internal Pod::Simple attribute here; that's a potential land mine.
464 if ($type eq 'bullet') {
466 } elsif ($type eq 'number') {
467 $item = $$attrs{'~orig_content'};
470 $item =~ s/\s*\n\s*/ /g;
473 $$self{ITEM} = $item;
475 # If body text for this item was included, go ahead and output that now.
483 # Dispatch the item commands to the appropriate place.
484 sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
485 sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
486 sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) }
487 sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) }
489 ##############################################################################
491 ##############################################################################
494 sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
495 sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
496 sub cmd_i { return '*' . $_[2] . '*' }
497 sub cmd_x { return '' }
499 # Apply a whole bunch of messy heuristics to not quote things that don't
500 # benefit from being quoted. These originally come from Barrie Slaymaker and
501 # largely duplicate code in Pod::Man.
503 my ($self, $attrs, $text) = @_;
505 # A regex that matches the portion of a variable reference that's the
506 # array or hash index, separated out just because we want to use it in
507 # several places in the following regex.
508 my $index = '(?: \[.*\] | \{.*\} )?';
510 # Check for things that we don't want to quote, and if we find any of
511 # them, return the string with just a font change and no quoting.
515 ( [\'\`\"] ) .* \1 # already quoted
516 | \` .* \' # `quoted'
517 | \$+ [\#^]? \S $index # special ($^Foo, $")
518 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
519 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
520 | [+-]? ( \d[\d.]* | \.\d+ ) (?: [eE][+-]?\d+ )? # a number
521 | 0x [a-fA-F\d]+ # a hex constant
526 # If we didn't return, go ahead and quote the text.
527 return $$self{opt_alt}
529 : "$$self{LQUOTE}$text$$self{RQUOTE}";
532 # Links reduce to the text that we're given, wrapped in angle brackets if it's
535 my ($self, $attrs, $text) = @_;
536 return $$attrs{type} eq 'url' ? "<$text>" : $text;
539 ##############################################################################
540 # Backwards compatibility
541 ##############################################################################
543 # The old Pod::Text module did everything in a pod2text() function. This
544 # tries to provide the same interface for legacy applications.
548 # This is really ugly; I hate doing option parsing in the middle of a
549 # module. But the old Pod::Text module supported passing flags to its
550 # entry function, so handle -a and -<number>.
551 while ($_[0] =~ /^-/) {
553 if ($flag eq '-a') { push (@args, alt => 1) }
554 elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
561 # Now that we know what arguments we're using, create the parser.
562 my $parser = Pod::Text->new (@args);
564 # If two arguments were given, the second argument is going to be a file
565 # handle. That means we want to call parse_from_filehandle(), which means
566 # we need to turn the first argument into a file handle. Magic open will
567 # handle the <&STDIN case automagically.
571 unless (open (IN, $fhs[0])) {
572 croak ("Can't open $fhs[0] for reading: $!\n");
576 return $parser->parse_file (@fhs);
578 return $parser->parse_file (@_);
582 ##############################################################################
583 # Module return value and documentation
584 ##############################################################################
591 Pod::Text - Convert POD data to formatted ASCII text
596 my $parser = Pod::Text->new (sentence => 0, width => 78);
598 # Read POD from STDIN and write to STDOUT.
599 $parser->parse_from_filehandle;
601 # Read POD from file.pod and write to file.txt.
602 $parser->parse_from_file ('file.pod', 'file.txt');
606 Pod::Text is a module that can convert documentation in the POD format (the
607 preferred language for documenting Perl) into formatted ASCII. It uses no
608 special formatting controls or codes whatsoever, and its output is therefore
609 suitable for nearly any device.
611 As a derived class from Pod::Simple, Pod::Text supports the same methods and
612 interfaces. See L<Pod::Simple> for all the details; briefly, one creates a
613 new parser with C<< Pod::Text->new() >> and then normally calls parse_file().
615 new() can take options, in the form of key/value pairs, that control the
616 behavior of the parser. The currently recognized options are:
622 If set to a true value, selects an alternate output format that, among other
623 things, uses a different heading style and marks C<=item> entries with a
624 colon in the left margin. Defaults to false.
628 If set to a true value, the non-POD parts of the input file will be included
629 in the output. Useful for viewing code documented with POD blocks with the
630 POD rendered and the code left intact.
634 The number of spaces to indent regular text, and the default indentation for
635 C<=over> blocks. Defaults to 4.
639 If set to a true value, a blank line is printed after a C<=head1> heading.
640 If set to false (the default), no blank line is printed after C<=head1>,
641 although one is still printed after C<=head2>. This is the default because
642 it's the expected formatting for manual pages; if you're formatting
643 arbitrary text documents, setting this to true may result in more pleasing
648 The width of the left margin in spaces. Defaults to 0. This is the margin
649 for all text, including headings, not the amount by which regular text is
650 indented; for the latter, see the I<indent> option. To set the right
651 margin, see the I<width> option.
655 Sets the quote marks used to surround CE<lt>> text. If the value is a
656 single character, it is used as both the left and right quote; if it is two
657 characters, the first character is used as the left quote and the second as
658 the right quoted; and if it is four characters, the first two are used as
659 the left quote and the second two as the right quote.
661 This may also be set to the special value C<none>, in which case no quote
662 marks are added around CE<lt>> text.
666 If set to a true value, Pod::Text will assume that each sentence ends in two
667 spaces, and will try to preserve that spacing. If set to false, all
668 consecutive whitespace in non-verbatim paragraphs is compressed into a
669 single space. Defaults to true.
673 The column at which to wrap text on the right-hand side. Defaults to 76.
677 The standard Pod::Simple method parse_file() takes one argument, the file or
678 file handle to read from, and writes output to standard output unless that
679 has been changed with the output_fh() method. See L<Pod::Simple> for the
680 specific details and for other alternative interfaces.
686 =item Bizarre space in item
688 =item Item called without tag
690 (W) Something has gone wrong in internal C<=item> processing. These
691 messages indicate a bug in Pod::Text; you should never see them.
693 =item Can't open %s for reading: %s
695 (F) Pod::Text was invoked via the compatibility mode pod2text() interface
696 and the input file it was given could not be opened.
698 =item Invalid quote specification "%s"
700 (F) The quote specification given (the quotes option to the constructor) was
701 invalid. A quote specification must be one, two, or four characters long.
707 This is a replacement for an earlier Pod::Text module written by Tom
708 Christiansen. It has a revamped interface, since it now uses Pod::Simple,
709 but an interface roughly compatible with the old Pod::Text::pod2text()
710 function is still available. Please change to the new calling convention,
713 The original Pod::Text contained code to do formatting via termcap
714 sequences, although it wasn't turned on by default and it was problematic to
715 get it to work at all. This rewrite doesn't even try to do that, but a
716 subclass of it does. Look for L<Pod::Text::Termcap>.
720 L<Pod::Simple>, L<Pod::Text::Termcap>, L<pod2text(1)>
722 The current version of this module is always available from its web site at
723 L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the
724 Perl core distribution as of 5.6.0.
728 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
729 Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
730 Pod::Parser by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial
731 conversion of Pod::Man to use Pod::Simple provided much-needed guidance on
732 how to use Pod::Simple.
734 =head1 COPYRIGHT AND LICENSE
736 Copyright 1999, 2000, 2001, 2002, 2004 by Russ Allbery <rra@stanford.edu>.
738 This program is free software; you may redistribute it and/or modify it
739 under the same terms as Perl itself.