# Pod::Text -- Convert POD data to formatted ASCII text.
-# $Id: Text.pm,v 2.19 2002/06/23 19:16:21 eagle Exp $
#
-# Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
+# Copyright 1999, 2000, 2001, 2002, 2004, 2006, 2008
+# Russ Allbery <rra@stanford.edu>
#
# This program is free software; you may redistribute it and/or modify it
# under the same terms as Perl itself.
require 5.004;
-use Carp qw(carp croak);
-use Exporter ();
-use Pod::ParseLink qw(parselink);
-use Pod::Select ();
-
use strict;
use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
-# We inherit from Pod::Select instead of Pod::Parser so that we can be used by
-# Pod::Usage.
-@ISA = qw(Pod::Select Exporter);
+use Carp qw(carp croak);
+use Exporter ();
+use Pod::Simple ();
+
+@ISA = qw(Pod::Simple Exporter);
# We have to export pod2text for backward compatibility.
@EXPORT = qw(pod2text);
-# Don't use the CVS revision as the version, since this module is also in Perl
-# core and too many things could munge CVS magic revision strings. This
-# number should ideally be the same as the CVS revision in podlators, however.
-$VERSION = 2.19;
-
+$VERSION = 3.11;
##############################################################################
-# Table of supported E<> escapes
+# Initialization
##############################################################################
-# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
-# got it near verbatim from the original Pod::Text. It is therefore credited
-# to Tom Christiansen, and I'm glad I didn't have to write it. :) "iexcl" to
-# "divide" added by Tim Jenness.
-%ESCAPES = (
- 'amp' => '&', # ampersand
- 'apos' => "'", # apostrophe
- 'lt' => '<', # left chevron, less-than
- 'gt' => '>', # right chevron, greater-than
- 'quot' => '"', # double quote
- 'sol' => '/', # solidus (forward slash)
- 'verbar' => '|', # vertical bar
-
- "Aacute" => "\xC1", # capital A, acute accent
- "aacute" => "\xE1", # small a, acute accent
- "Acirc" => "\xC2", # capital A, circumflex accent
- "acirc" => "\xE2", # small a, circumflex accent
- "AElig" => "\xC6", # capital AE diphthong (ligature)
- "aelig" => "\xE6", # small ae diphthong (ligature)
- "Agrave" => "\xC0", # capital A, grave accent
- "agrave" => "\xE0", # small a, grave accent
- "Aring" => "\xC5", # capital A, ring
- "aring" => "\xE5", # small a, ring
- "Atilde" => "\xC3", # capital A, tilde
- "atilde" => "\xE3", # small a, tilde
- "Auml" => "\xC4", # capital A, dieresis or umlaut mark
- "auml" => "\xE4", # small a, dieresis or umlaut mark
- "Ccedil" => "\xC7", # capital C, cedilla
- "ccedil" => "\xE7", # small c, cedilla
- "Eacute" => "\xC9", # capital E, acute accent
- "eacute" => "\xE9", # small e, acute accent
- "Ecirc" => "\xCA", # capital E, circumflex accent
- "ecirc" => "\xEA", # small e, circumflex accent
- "Egrave" => "\xC8", # capital E, grave accent
- "egrave" => "\xE8", # small e, grave accent
- "ETH" => "\xD0", # capital Eth, Icelandic
- "eth" => "\xF0", # small eth, Icelandic
- "Euml" => "\xCB", # capital E, dieresis or umlaut mark
- "euml" => "\xEB", # small e, dieresis or umlaut mark
- "Iacute" => "\xCD", # capital I, acute accent
- "iacute" => "\xED", # small i, acute accent
- "Icirc" => "\xCE", # capital I, circumflex accent
- "icirc" => "\xEE", # small i, circumflex accent
- "Igrave" => "\xCC", # capital I, grave accent
- "igrave" => "\xEC", # small i, grave accent
- "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
- "iuml" => "\xEF", # small i, dieresis or umlaut mark
- "Ntilde" => "\xD1", # capital N, tilde
- "ntilde" => "\xF1", # small n, tilde
- "Oacute" => "\xD3", # capital O, acute accent
- "oacute" => "\xF3", # small o, acute accent
- "Ocirc" => "\xD4", # capital O, circumflex accent
- "ocirc" => "\xF4", # small o, circumflex accent
- "Ograve" => "\xD2", # capital O, grave accent
- "ograve" => "\xF2", # small o, grave accent
- "Oslash" => "\xD8", # capital O, slash
- "oslash" => "\xF8", # small o, slash
- "Otilde" => "\xD5", # capital O, tilde
- "otilde" => "\xF5", # small o, tilde
- "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
- "ouml" => "\xF6", # small o, dieresis or umlaut mark
- "szlig" => "\xDF", # small sharp s, German (sz ligature)
- "THORN" => "\xDE", # capital THORN, Icelandic
- "thorn" => "\xFE", # small thorn, Icelandic
- "Uacute" => "\xDA", # capital U, acute accent
- "uacute" => "\xFA", # small u, acute accent
- "Ucirc" => "\xDB", # capital U, circumflex accent
- "ucirc" => "\xFB", # small u, circumflex accent
- "Ugrave" => "\xD9", # capital U, grave accent
- "ugrave" => "\xF9", # small u, grave accent
- "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
- "uuml" => "\xFC", # small u, dieresis or umlaut mark
- "Yacute" => "\xDD", # capital Y, acute accent
- "yacute" => "\xFD", # small y, acute accent
- "yuml" => "\xFF", # small y, dieresis or umlaut mark
-
- "laquo" => "\xAB", # left pointing double angle quotation mark
- "lchevron" => "\xAB", # synonym (backwards compatibility)
- "raquo" => "\xBB", # right pointing double angle quotation mark
- "rchevron" => "\xBB", # synonym (backwards compatibility)
-
- "iexcl" => "\xA1", # inverted exclamation mark
- "cent" => "\xA2", # cent sign
- "pound" => "\xA3", # (UK) pound sign
- "curren" => "\xA4", # currency sign
- "yen" => "\xA5", # yen sign
- "brvbar" => "\xA6", # broken vertical bar
- "sect" => "\xA7", # section sign
- "uml" => "\xA8", # diaresis
- "copy" => "\xA9", # Copyright symbol
- "ordf" => "\xAA", # feminine ordinal indicator
- "not" => "\xAC", # not sign
- "shy" => '', # soft (discretionary) hyphen
- "reg" => "\xAE", # registered trademark
- "macr" => "\xAF", # macron, overline
- "deg" => "\xB0", # degree sign
- "plusmn" => "\xB1", # plus-minus sign
- "sup2" => "\xB2", # superscript 2
- "sup3" => "\xB3", # superscript 3
- "acute" => "\xB4", # acute accent
- "micro" => "\xB5", # micro sign
- "para" => "\xB6", # pilcrow sign = paragraph sign
- "middot" => "\xB7", # middle dot = Georgian comma
- "cedil" => "\xB8", # cedilla
- "sup1" => "\xB9", # superscript 1
- "ordm" => "\xBA", # masculine ordinal indicator
- "frac14" => "\xBC", # vulgar fraction one quarter
- "frac12" => "\xBD", # vulgar fraction one half
- "frac34" => "\xBE", # vulgar fraction three quarters
- "iquest" => "\xBF", # inverted question mark
- "times" => "\xD7", # multiplication sign
- "divide" => "\xF7", # division sign
-
- "nbsp" => "\x01", # non-breaking space
-);
-
+# This function handles code blocks. It's registered as a callback to
+# Pod::Simple and therefore doesn't work as a regular method call, but all it
+# does is call output_code with the line.
+sub handle_code {
+ my ($line, $number, $parser) = @_;
+ $parser->output_code ($line . "\n");
+}
-##############################################################################
-# Initialization
-##############################################################################
+# Initialize the object and set various Pod::Simple options that we need.
+# Here, we also process any additional options passed to the constructor or
+# set up defaults if none were given. Note that all internal object keys are
+# in all-caps, reserving all lower-case object keys for Pod::Simple and user
+# arguments.
+sub new {
+ my $class = shift;
+ my $self = $class->SUPER::new;
+
+ # Tell Pod::Simple to handle S<> by automatically inserting .
+ $self->nbsp_for_S (1);
+
+ # Tell Pod::Simple to keep whitespace whenever possible.
+ if ($self->can ('preserve_whitespace')) {
+ $self->preserve_whitespace (1);
+ } else {
+ $self->fullstop_space_harden (1);
+ }
-# Initialize the object. Must be sure to call our parent initializer.
-sub initialize {
- my $self = shift;
+ # The =for and =begin targets that we accept.
+ $self->accept_targets (qw/text TEXT/);
+
+ # Ensure that contiguous blocks of code are merged together. Otherwise,
+ # some of the guesswork heuristics don't work right.
+ $self->merge_text (1);
+
+ # Pod::Simple doesn't do anything useful with our arguments, but we want
+ # to put them in our object as hash keys and values. This could cause
+ # problems if we ever clash with Pod::Simple's own internal class
+ # variables.
+ my %opts = @_;
+ my @opts = map { ("opt_$_", $opts{$_}) } keys %opts;
+ %$self = (%$self, @opts);
+
+ # Send errors to stderr if requested.
+ if ($$self{opt_stderr}) {
+ $self->no_errata_section (1);
+ $self->complain_stderr (1);
+ delete $$self{opt_stderr};
+ }
- $$self{alt} = 0 unless defined $$self{alt};
- $$self{indent} = 4 unless defined $$self{indent};
- $$self{loose} = 0 unless defined $$self{loose};
- $$self{sentence} = 0 unless defined $$self{sentence};
- $$self{width} = 76 unless defined $$self{width};
+ # Initialize various things from our parameters.
+ $$self{opt_alt} = 0 unless defined $$self{opt_alt};
+ $$self{opt_indent} = 4 unless defined $$self{opt_indent};
+ $$self{opt_margin} = 0 unless defined $$self{opt_margin};
+ $$self{opt_loose} = 0 unless defined $$self{opt_loose};
+ $$self{opt_sentence} = 0 unless defined $$self{opt_sentence};
+ $$self{opt_width} = 76 unless defined $$self{opt_width};
# Figure out what quotes we'll be using for C<> text.
- $$self{quotes} ||= '"';
- if ($$self{quotes} eq 'none') {
+ $$self{opt_quotes} ||= '"';
+ if ($$self{opt_quotes} eq 'none') {
$$self{LQUOTE} = $$self{RQUOTE} = '';
- } elsif (length ($$self{quotes}) == 1) {
- $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
- } elsif ($$self{quotes} =~ /^(.)(.)$/
- || $$self{quotes} =~ /^(..)(..)$/) {
+ } elsif (length ($$self{opt_quotes}) == 1) {
+ $$self{LQUOTE} = $$self{RQUOTE} = $$self{opt_quotes};
+ } elsif ($$self{opt_quotes} =~ /^(.)(.)$/
+ || $$self{opt_quotes} =~ /^(..)(..)$/) {
$$self{LQUOTE} = $1;
$$self{RQUOTE} = $2;
} else {
- croak qq(Invalid quote specification "$$self{quotes}");
+ croak qq(Invalid quote specification "$$self{opt_quotes}");
}
- $$self{INDENTS} = []; # Stack of indentations.
- $$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
+ # If requested, do something with the non-POD text.
+ $self->code_handler (\&handle_code) if $$self{opt_code};
- $self->SUPER::initialize;
-
- # Tell Pod::Parser that we want the non-POD stuff too if code was set.
- $self->parseopts ('-want_nonPODs' => 1) if $$self{code};
+ # Return the created object.
+ return $self;
}
-
##############################################################################
-# Core overrides
+# Core parsing
##############################################################################
-# Called for each command paragraph. Gets the command, the associated
-# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
-# the command to a method named the same as the command. =cut is handled
-# internally by Pod::Parser.
-sub command {
- my $self = shift;
- my $command = shift;
- return if $command eq 'pod';
- return if ($$self{EXCLUDE} && $command ne 'end');
- if ($self->can ('cmd_' . $command)) {
- $command = 'cmd_' . $command;
- $self->$command (@_);
- } else {
- my ($text, $line, $paragraph) = @_;
- my $file;
- ($file, $line) = $paragraph->file_line;
- $text =~ s/\n+\z//;
- $text = " $text" if ($text =~ /^\S/);
- warn qq($file:$line: Unknown command paragraph: =$command$text\n);
- return;
+# This is the glue that connects the code below with Pod::Simple itself. The
+# goal is to convert the event stream coming from the POD parser into method
+# calls to handlers once the complete content of a tag has been seen. Each
+# paragraph or POD command will have textual content associated with it, and
+# as soon as all of a paragraph or POD command has been seen, that content
+# will be passed in to the corresponding method for handling that type of
+# object. The exceptions are handlers for lists, which have opening tag
+# handlers and closing tag handlers that will be called right away.
+#
+# The internal hash key PENDING is used to store the contents of a tag until
+# all of it has been seen. It holds a stack of open tags, each one
+# represented by a tuple of the attributes hash for the tag and the contents
+# of the tag.
+
+# Add a block of text to the contents of the current node, formatting it
+# according to the current formatting instructions as we do.
+sub _handle_text {
+ my ($self, $text) = @_;
+ my $tag = $$self{PENDING}[-1];
+ $$tag[1] .= $text;
+}
+
+# Given an element name, get the corresponding method name.
+sub method_for_element {
+ my ($self, $element) = @_;
+ $element =~ tr/-/_/;
+ $element =~ tr/A-Z/a-z/;
+ $element =~ tr/_a-z0-9//cd;
+ return $element;
+}
+
+# Handle the start of a new element. If cmd_element is defined, assume that
+# we need to collect the entire tree for this element before passing it to the
+# element method, and create a new tree into which we'll collect blocks of
+# text and nested elements. Otherwise, if start_element is defined, call it.
+sub _handle_element_start {
+ my ($self, $element, $attrs) = @_;
+ my $method = $self->method_for_element ($element);
+
+ # If we have a command handler, we need to accumulate the contents of the
+ # tag before calling it.
+ if ($self->can ("cmd_$method")) {
+ push (@{ $$self{PENDING} }, [ $attrs, '' ]);
+ } elsif ($self->can ("start_$method")) {
+ my $method = 'start_' . $method;
+ $self->$method ($attrs, '');
+ }
+}
+
+# Handle the end of an element. If we had a cmd_ method for this element,
+# this is where we pass along the text that we've accumulated. Otherwise, if
+# we have an end_ method for the element, call that.
+sub _handle_element_end {
+ my ($self, $element) = @_;
+ my $method = $self->method_for_element ($element);
+
+ # If we have a command handler, pull off the pending text and pass it to
+ # the handler along with the saved attribute hash.
+ if ($self->can ("cmd_$method")) {
+ my $tag = pop @{ $$self{PENDING} };
+ my $method = 'cmd_' . $method;
+ my $text = $self->$method (@$tag);
+ if (defined $text) {
+ if (@{ $$self{PENDING} } > 1) {
+ $$self{PENDING}[-1][1] .= $text;
+ } else {
+ $self->output ($text);
+ }
+ }
+ } elsif ($self->can ("end_$method")) {
+ my $method = 'end_' . $method;
+ $self->$method ();
}
}
-# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
-# Pod::Paragraph object. Just output it verbatim, but with tabs converted to
-# spaces.
-sub verbatim {
+##############################################################################
+# Output formatting
+##############################################################################
+
+# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
+# because it plays games with tabs. We can't use formline, even though we'd
+# really like to, because it screws up non-printing characters. So we have to
+# do the wrapping ourselves.
+sub wrap {
my $self = shift;
- return if $$self{EXCLUDE};
- $self->item if defined $$self{ITEM};
local $_ = shift;
- return if /^\s*$/;
- s/^(\s*\S+)/(' ' x $$self{MARGIN}) . $1/gme;
- $self->output ($_);
+ my $output = '';
+ my $spaces = ' ' x $$self{MARGIN};
+ my $width = $$self{opt_width} - $$self{MARGIN};
+ while (length > $width) {
+ if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
+ $output .= $spaces . $1 . "\n";
+ } else {
+ last;
+ }
+ }
+ $output .= $spaces . $_;
+ $output =~ s/\s+$/\n\n/;
+ return $output;
}
-# Called for a regular text block. Gets the paragraph, the line number, and a
-# Pod::Paragraph object. Perform interpolation and output the results.
-sub textblock {
+# Reformat a paragraph of text for the current margin. Takes the text to
+# reformat and returns the formatted text.
+sub reformat {
my $self = shift;
- return if $$self{EXCLUDE};
- $self->output ($_[0]), return if $$self{VERBATIM};
local $_ = shift;
- my $line = shift;
- # Interpolate and output the paragraph.
- $_ = $self->interpolate ($_, $line);
- s/\s+$/\n/;
- if (defined $$self{ITEM}) {
- $self->item ($_ . "\n");
+ # If we're trying to preserve two spaces after sentences, do some munging
+ # to support that. Otherwise, smash all repeated whitespace.
+ if ($$self{opt_sentence}) {
+ s/ +$//mg;
+ s/\.\n/. \n/g;
+ s/\n/ /g;
+ s/ +/ /g;
} else {
- $self->output ($self->reformat ($_ . "\n"));
+ s/\s+/ /g;
}
+ return $self->wrap ($_);
}
-# Called for a formatting code. Gets the command, argument, and a
-# Pod::InteriorSequence object and is expected to return the resulting text.
-# Calls methods for code, bold, italic, file, and link to handle those types
-# of codes, and handles S<>, E<>, X<>, and Z<> directly.
-sub interior_sequence {
- local $_;
- my ($self, $command, $seq);
- ($self, $command, $_, $seq) = @_;
-
- # We have to defer processing of the inside of an L<> formatting code. If
- # this code is nested inside an L<> code, return the literal raw text of
- # it.
- my $parent = $seq->nested;
- while (defined $parent) {
- return $seq->raw_text if ($parent->cmd_name eq 'L');
- $parent = $parent->nested;
- }
+# Output text to the output device. Replace non-breaking spaces with spaces
+# and soft hyphens with nothing.
+sub output {
+ my ($self, $text) = @_;
+ $text =~ tr/\240\255/ /d;
+ print { $$self{output_fh} } $text;
+}
+
+# Output a block of code (something that isn't part of the POD text). Called
+# by preprocess_paragraph only if we were given the code option. Exists here
+# only so that it can be overridden by subclasses.
+sub output_code { $_[0]->output ($_[1]) }
- # Index entries are ignored in plain text.
- return '' if ($command eq 'X' || $command eq 'Z');
+##############################################################################
+# Document initialization
+##############################################################################
- # Expand escapes into the actual character now, warning if invalid.
- if ($command eq 'E') {
- if (/^\d+$/) {
- return chr;
- } else {
- return $ESCAPES{$_} if defined $ESCAPES{$_};
- my ($file, $line) = $seq->file_line;
- warn "$file:$line: Unknown escape: E<$_>\n";
- return "E<$_>";
- }
+# Set up various things that have to be initialized on a per-document basis.
+sub start_document {
+ my $self = shift;
+ my $margin = $$self{opt_indent} + $$self{opt_margin};
+
+ # Initialize a few per-document variables.
+ $$self{INDENTS} = []; # Stack of indentations.
+ $$self{MARGIN} = $margin; # Default left margin.
+ $$self{PENDING} = [[]]; # Pending output.
+
+ return '';
+}
+
+##############################################################################
+# Text blocks
+##############################################################################
+
+# This method is called whenever an =item command is complete (in other words,
+# we've seen its associated paragraph or know for certain that it doesn't have
+# one). It gets the paragraph associated with the item as an argument. If
+# that argument is empty, just output the item tag; if it contains a newline,
+# output the item tag followed by the newline. Otherwise, see if there's
+# enough room for us to output the item tag in the margin of the text or if we
+# have to put it on a separate line.
+sub item {
+ my ($self, $text) = @_;
+ my $tag = $$self{ITEM};
+ unless (defined $tag) {
+ carp "Item called without tag";
+ return;
}
+ undef $$self{ITEM};
- # For all the other formatting codes, empty content produces no output.
- return if $_ eq '';
+ # Calculate the indentation and margin. $fits is set to true if the tag
+ # will fit into the margin of the paragraph given our indentation level.
+ my $indent = $$self{INDENTS}[-1];
+ $indent = $$self{opt_indent} unless defined $indent;
+ my $margin = ' ' x $$self{opt_margin};
+ my $fits = ($$self{MARGIN} - $indent >= length ($tag) + 1);
+
+ # If the tag doesn't fit, or if we have no associated text, print out the
+ # tag separately. Otherwise, put the tag in the margin of the paragraph.
+ if (!$text || $text =~ /^\s+$/ || !$fits) {
+ my $realindent = $$self{MARGIN};
+ $$self{MARGIN} = $indent;
+ my $output = $self->reformat ($tag);
+ $output =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
+ $output =~ s/\n*$/\n/;
- # For S<>, compress all internal whitespace and then map spaces to \01.
- # When we output the text, we'll map this back.
- if ($command eq 'S') {
- s/\s+/ /g;
- tr/ /\01/;
- return $_;
+ # If the text is just whitespace, we have an empty item paragraph;
+ # this can result from =over/=item/=back without any intermixed
+ # paragraphs. Insert some whitespace to keep the =item from merging
+ # into the next paragraph.
+ $output .= "\n" if $text && $text =~ /^\s*$/;
+
+ $self->output ($output);
+ $$self{MARGIN} = $realindent;
+ $self->output ($self->reformat ($text)) if ($text && $text =~ /\S/);
+ } else {
+ my $space = ' ' x $indent;
+ $space =~ s/^$margin /$margin:/ if $$self{opt_alt};
+ $text = $self->reformat ($text);
+ $text =~ s/^$margin /$margin:/ if ($$self{opt_alt} && $indent > 0);
+ my $tagspace = ' ' x length $tag;
+ $text =~ s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
+ $self->output ($text);
}
+}
- # Anything else needs to get dispatched to another method.
- if ($command eq 'B') { return $self->seq_b ($_) }
- elsif ($command eq 'C') { return $self->seq_c ($_) }
- elsif ($command eq 'F') { return $self->seq_f ($_) }
- elsif ($command eq 'I') { return $self->seq_i ($_) }
- elsif ($command eq 'L') { return $self->seq_l ($_, $seq) }
- else {
- my ($file, $line) = $seq->file_line;
- warn "$file:$line: Unknown formatting code: $command<$_>\n";
+# Handle a basic block of text. The only tricky thing here is that if there
+# is a pending item tag, we need to format this as an item paragraph.
+sub cmd_para {
+ my ($self, $attrs, $text) = @_;
+ $text =~ s/\s+$/\n/;
+ if (defined $$self{ITEM}) {
+ $self->item ($text . "\n");
+ } else {
+ $self->output ($self->reformat ($text . "\n"));
}
+ return '';
}
-# Called for each paragraph that's actually part of the POD. We take
-# advantage of this opportunity to untabify the input. Also, if given the
-# code option, we may see paragraphs that aren't part of the POD and need to
-# output them directly.
-sub preprocess_paragraph {
- my $self = shift;
- local $_ = shift;
- 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
- $self->output_code ($_) if $self->cutting;
- $_;
+# Handle a verbatim paragraph. Just print it out, but indent it according to
+# our margin.
+sub cmd_verbatim {
+ my ($self, $attrs, $text) = @_;
+ $self->item if defined $$self{ITEM};
+ return if $text =~ /^\s*$/;
+ $text =~ s/^(\n*)(\s*\S+)/$1 . (' ' x $$self{MARGIN}) . $2/gme;
+ $text =~ s/\s*$/\n\n/;
+ $self->output ($text);
+ return '';
}
+# Handle literal text (produced by =for and similar constructs). Just output
+# it with the minimum of changes.
+sub cmd_data {
+ my ($self, $attrs, $text) = @_;
+ $text =~ s/^\n+//;
+ $text =~ s/\n{0,2}$/\n/;
+ $self->output ($text);
+ return '';
+}
##############################################################################
-# Command paragraphs
+# Headings
##############################################################################
-# All command paragraphs take the paragraph and the line number.
+# The common code for handling all headers. Takes the header text, the
+# indentation, and the surrounding marker for the alt formatting method.
+sub heading {
+ my ($self, $text, $indent, $marker) = @_;
+ $self->item ("\n\n") if defined $$self{ITEM};
+ $text =~ s/\s+$//;
+ if ($$self{opt_alt}) {
+ my $closemark = reverse (split (//, $marker));
+ my $margin = ' ' x $$self{opt_margin};
+ $self->output ("\n" . "$margin$marker $text $closemark" . "\n\n");
+ } else {
+ $text .= "\n" if $$self{opt_loose};
+ my $margin = ' ' x ($$self{opt_margin} + $indent);
+ $self->output ($margin . $text . "\n");
+ }
+ return '';
+}
# First level heading.
sub cmd_head1 {
- my ($self, $text, $line) = @_;
- $self->heading ($text, $line, 0, '====');
+ my ($self, $attrs, $text) = @_;
+ $self->heading ($text, 0, '====');
}
# Second level heading.
sub cmd_head2 {
- my ($self, $text, $line) = @_;
- $self->heading ($text, $line, $$self{indent} / 2, '== ');
+ my ($self, $attrs, $text) = @_;
+ $self->heading ($text, $$self{opt_indent} / 2, '== ');
}
# Third level heading.
sub cmd_head3 {
- my ($self, $text, $line) = @_;
- $self->heading ($text, $line, $$self{indent} * 2 / 3 + 0.5, '= ');
+ my ($self, $attrs, $text) = @_;
+ $self->heading ($text, $$self{opt_indent} * 2 / 3 + 0.5, '= ');
}
-# Third level heading.
+# Fourth level heading.
sub cmd_head4 {
- my ($self, $text, $line) = @_;
- $self->heading ($text, $line, $$self{indent} * 3 / 4 + 0.5, '- ');
+ my ($self, $attrs, $text) = @_;
+ $self->heading ($text, $$self{opt_indent} * 3 / 4 + 0.5, '- ');
}
-# Start a list.
-sub cmd_over {
- my $self = shift;
- local $_ = shift;
+##############################################################################
+# List handling
+##############################################################################
+
+# Handle the beginning of an =over block. Takes the type of the block as the
+# first argument, and then the attr hash. This is called by the handlers for
+# the four different types of lists (bullet, number, text, and block).
+sub over_common_start {
+ my ($self, $attrs) = @_;
$self->item ("\n\n") if defined $$self{ITEM};
- unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
+
+ # Find the indentation level.
+ my $indent = $$attrs{indent};
+ unless (defined ($indent) && $indent =~ /^\s*[-+]?\d{1,4}\s*$/) {
+ $indent = $$self{opt_indent};
+ }
+
+ # Add this to our stack of indents and increase our current margin.
push (@{ $$self{INDENTS} }, $$self{MARGIN});
- $$self{MARGIN} += ($_ + 0);
+ $$self{MARGIN} += ($indent + 0);
+ return '';
}
-# End a list.
-sub cmd_back {
- my ($self, $text, $line, $paragraph) = @_;
+# End an =over block. Takes no options other than the class pointer. Output
+# any pending items and then pop one level of indentation.
+sub over_common_end {
+ my ($self) = @_;
$self->item ("\n\n") if defined $$self{ITEM};
$$self{MARGIN} = pop @{ $$self{INDENTS} };
- unless (defined $$self{MARGIN}) {
- my $file;
- ($file, $line) = $paragraph->file_line;
- warn "$file:$line: Unmatched =back\n";
- $$self{MARGIN} = $$self{indent};
- }
+ return '';
}
-# An individual list item.
-sub cmd_item {
- my $self = shift;
- if (defined $$self{ITEM}) { $self->item }
- local $_ = shift;
- s/\s+$//;
- $$self{ITEM} = $_ ? $self->interpolate ($_) : '*';
-}
+# Dispatch the start and end calls as appropriate.
+sub start_over_bullet { $_[0]->over_common_start ($_[1]) }
+sub start_over_number { $_[0]->over_common_start ($_[1]) }
+sub start_over_text { $_[0]->over_common_start ($_[1]) }
+sub start_over_block { $_[0]->over_common_start ($_[1]) }
+sub end_over_bullet { $_[0]->over_common_end }
+sub end_over_number { $_[0]->over_common_end }
+sub end_over_text { $_[0]->over_common_end }
+sub end_over_block { $_[0]->over_common_end }
+
+# The common handler for all item commands. Takes the type of the item, the
+# attributes, and then the text of the item.
+sub item_common {
+ my ($self, $type, $attrs, $text) = @_;
+ $self->item if defined $$self{ITEM};
-# Begin a block for a particular translator. Setting VERBATIM triggers
-# special handling in textblock().
-sub cmd_begin {
- my $self = shift;
- local $_ = shift;
- my ($kind) = /^(\S+)/ or return;
- if ($kind eq 'text') {
- $$self{VERBATIM} = 1;
+ # Clean up the text. We want to end up with two variables, one ($text)
+ # which contains any body text after taking out the item portion, and
+ # another ($item) which contains the actual item text. Note the use of
+ # the internal Pod::Simple attribute here; that's a potential land mine.
+ $text =~ s/\s+$//;
+ my ($item, $index);
+ if ($type eq 'bullet') {
+ $item = '*';
+ } elsif ($type eq 'number') {
+ $item = $$attrs{'~orig_content'};
} else {
- $$self{EXCLUDE} = 1;
+ $item = $text;
+ $item =~ s/\s*\n\s*/ /g;
+ $text = '';
}
-}
+ $$self{ITEM} = $item;
-# End a block for a particular translator. We assume that all =begin/=end
-# pairs are properly closed.
-sub cmd_end {
- my $self = shift;
- $$self{EXCLUDE} = 0;
- $$self{VERBATIM} = 0;
-}
-
-# One paragraph for a particular translator. Ignore it unless it's intended
-# for text, in which case we treat it as a verbatim text block.
-sub cmd_for {
- my $self = shift;
- local $_ = shift;
- my $line = shift;
- return unless s/^text\b[ \t]*\n?//;
- $self->verbatim ($_, $line);
+ # If body text for this item was included, go ahead and output that now.
+ if ($text) {
+ $text =~ s/\s*$/\n/;
+ $self->item ($text);
+ }
+ return '';
}
+# Dispatch the item commands to the appropriate place.
+sub cmd_item_bullet { my $self = shift; $self->item_common ('bullet', @_) }
+sub cmd_item_number { my $self = shift; $self->item_common ('number', @_) }
+sub cmd_item_text { my $self = shift; $self->item_common ('text', @_) }
+sub cmd_item_block { my $self = shift; $self->item_common ('block', @_) }
##############################################################################
# Formatting codes
##############################################################################
-# The simple ones. These are here mostly so that subclasses can override them
-# and do more complicated things.
-sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
-sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
-sub seq_i { return '*' . $_[1] . '*' }
+# The simple ones.
+sub cmd_b { return $_[0]{alt} ? "``$_[2]''" : $_[2] }
+sub cmd_f { return $_[0]{alt} ? "\"$_[2]\"" : $_[2] }
+sub cmd_i { return '*' . $_[2] . '*' }
+sub cmd_x { return '' }
# Apply a whole bunch of messy heuristics to not quote things that don't
# benefit from being quoted. These originally come from Barrie Slaymaker and
# largely duplicate code in Pod::Man.
-sub seq_c {
- my $self = shift;
- local $_ = shift;
+sub cmd_c {
+ my ($self, $attrs, $text) = @_;
# A regex that matches the portion of a variable reference that's the
# array or hash index, separated out just because we want to use it in
# Check for things that we don't want to quote, and if we find any of
# them, return the string with just a font change and no quoting.
- m{
+ $text =~ m{
^\s*
(?:
( [\'\`\"] ) .* \1 # already quoted
| 0x [a-fA-F\d]+ # a hex constant
)
\s*\z
- }xo && return $_;
+ }xo && return $text;
# If we didn't return, go ahead and quote the text.
- return $$self{alt} ? "``$_''" : "$$self{LQUOTE}$_$$self{RQUOTE}";
-}
-
-# Handle links. Since this is plain text, we can't actually make any real
-# links, so this is all to figure out what text we print out. Most of the
-# work is done by Pod::ParseLink.
-sub seq_l {
- my ($self, $link, $seq) = @_;
- my ($text, $type) = (parselink ($link))[1,4];
- my ($file, $line) = $seq->file_line;
- $text = $self->interpolate ($text, $line);
- $text = '<' . $text . '>' if $type eq 'url';
- return $text || '';
-}
-
-
-##############################################################################
-# Header handling
-##############################################################################
-
-# The common code for handling all headers. Takes the interpolated header
-# text, the line number, the indentation, and the surrounding marker for the
-# alt formatting method.
-sub heading {
- my ($self, $text, $line, $indent, $marker) = @_;
- $self->item ("\n\n") if defined $$self{ITEM};
- $text =~ s/\s+$//;
- $text = $self->interpolate ($text, $line);
- if ($$self{alt}) {
- my $closemark = reverse (split (//, $marker));
- $self->output ("\n" . "$marker $text $closemark" . "\n\n");
- } else {
- $text .= "\n" if $$self{loose};
- $self->output (' ' x $indent . $text . "\n");
- }
+ return $$self{opt_alt}
+ ? "``$text''"
+ : "$$self{LQUOTE}$text$$self{RQUOTE}";
}
-
-##############################################################################
-# List handling
-##############################################################################
-
-# This method is called whenever an =item command is complete (in other words,
-# we've seen its associated paragraph or know for certain that it doesn't have
-# one). It gets the paragraph associated with the item as an argument. If
-# that argument is empty, just output the item tag; if it contains a newline,
-# output the item tag followed by the newline. Otherwise, see if there's
-# enough room for us to output the item tag in the margin of the text or if we
-# have to put it on a separate line.
-sub item {
- my $self = shift;
- local $_ = shift;
- my $tag = $$self{ITEM};
- unless (defined $tag) {
- carp "Item called without tag";
- return;
- }
- undef $$self{ITEM};
- my $indent = $$self{INDENTS}[-1];
- unless (defined $indent) { $indent = $$self{indent} }
- my $space = ' ' x $indent;
- $space =~ s/^ /:/ if $$self{alt};
- if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
- my $margin = $$self{MARGIN};
- $$self{MARGIN} = $indent;
- my $output = $self->reformat ($tag);
- $output =~ s/\n*$/\n/;
-
- # If the text is just whitespace, we have an empty item paragraph;
- # this can result from =over/=item/=back without any intermixed
- # paragraphs. Insert some whitespace to keep the =item from merging
- # into the next paragraph.
- $output .= "\n" if $_ && $_ =~ /^\s*$/;
-
- $self->output ($output);
- $$self{MARGIN} = $margin;
- $self->output ($self->reformat ($_)) if $_ && /\S/;
- } else {
- $_ = $self->reformat ($_);
- s/^ /:/ if ($$self{alt} && $indent > 0);
- my $tagspace = ' ' x length $tag;
- s/^($space)$tagspace/$1$tag/ or warn "Bizarre space in item";
- $self->output ($_);
- }
+# Links reduce to the text that we're given, wrapped in angle brackets if it's
+# a URL.
+sub cmd_l {
+ my ($self, $attrs, $text) = @_;
+ return $$attrs{type} eq 'url' ? "<$text>" : $text;
}
-
-##############################################################################
-# Output formatting
-##############################################################################
-
-# Wrap a line, indenting by the current left margin. We can't use Text::Wrap
-# because it plays games with tabs. We can't use formline, even though we'd
-# really like to, because it screws up non-printing characters. So we have to
-# do the wrapping ourselves.
-sub wrap {
- my $self = shift;
- local $_ = shift;
- my $output = '';
- my $spaces = ' ' x $$self{MARGIN};
- my $width = $$self{width} - $$self{MARGIN};
- while (length > $width) {
- if (s/^([^\n]{0,$width})\s+// || s/^([^\n]{$width})//) {
- $output .= $spaces . $1 . "\n";
- } else {
- last;
- }
- }
- $output .= $spaces . $_;
- $output =~ s/\s+$/\n\n/;
- $output;
-}
-
-# Reformat a paragraph of text for the current margin. Takes the text to
-# reformat and returns the formatted text.
-sub reformat {
- my $self = shift;
- local $_ = shift;
-
- # If we're trying to preserve two spaces after sentences, do some munging
- # to support that. Otherwise, smash all repeated whitespace.
- if ($$self{sentence}) {
- s/ +$//mg;
- s/\.\n/. \n/g;
- s/\n/ /g;
- s/ +/ /g;
- } else {
- s/\s+/ /g;
- }
- $self->wrap ($_);
-}
-
-# Output text to the output device.
-sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
-
-# Output a block of code (something that isn't part of the POD text). Called
-# by preprocess_paragraph only if we were given the code option. Exists here
-# only so that it can be overridden by subclasses.
-sub output_code { $_[0]->output ($_[1]) }
-
-
##############################################################################
# Backwards compatibility
##############################################################################
return;
}
$fhs[0] = \*IN;
- return $parser->parse_from_filehandle (@fhs);
+ $parser->output_fh ($fhs[1]);
+ my $retval = $parser->parse_file ($fhs[0]);
+ my $fh = $parser->output_fh ();
+ close $fh;
+ return $retval;
} else {
- return $parser->parse_from_file (@_);
+ return $parser->parse_file (@_);
}
}
+# Reset the underlying Pod::Simple object between calls to parse_from_file so
+# that the same object can be reused to convert multiple pages.
+sub parse_from_file {
+ my $self = shift;
+ $self->reinit;
+
+ # Fake the old cutting option to Pod::Parser. This fiddings with internal
+ # Pod::Simple state and is quite ugly; we need a better approach.
+ if (ref ($_[0]) eq 'HASH') {
+ my $opts = shift @_;
+ if (defined ($$opts{-cutting}) && !$$opts{-cutting}) {
+ $$self{in_pod} = 1;
+ $$self{last_was_blank} = 1;
+ }
+ }
+
+ # Do the work.
+ my $retval = $self->Pod::Simple::parse_from_file (@_);
+
+ # Flush output, since Pod::Simple doesn't do this. Ideally we should also
+ # close the file descriptor if we had to open one, but we can't easily
+ # figure this out.
+ my $fh = $self->output_fh ();
+ my $oldfh = select $fh;
+ my $oldflush = $|;
+ $| = 1;
+ print $fh '';
+ $| = $oldflush;
+ select $oldfh;
+ return $retval;
+}
+
+# Pod::Simple failed to provide this backward compatibility function, so
+# implement it ourselves. File handles are one of the inputs that
+# parse_from_file supports.
+sub parse_from_filehandle {
+ my $self = shift;
+ $self->parse_from_file (@_);
+}
##############################################################################
# Module return value and documentation
Pod::Text - Convert POD data to formatted ASCII text
+=for stopwords
+alt stderr Allbery Sean Burke's Christiansen
+
=head1 SYNOPSIS
use Pod::Text;
special formatting controls or codes whatsoever, and its output is therefore
suitable for nearly any device.
-As a derived class from Pod::Parser, Pod::Text supports the same methods and
-interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
-new parser with C<< Pod::Text->new() >> and then calls either
-parse_from_filehandle() or parse_from_file().
+As a derived class from Pod::Simple, Pod::Text supports the same methods and
+interfaces. See L<Pod::Simple> for all the details; briefly, one creates a
+new parser with C<< Pod::Text->new() >> and then normally calls parse_file().
new() can take options, in the form of key/value pairs, that control the
behavior of the parser. The currently recognized options are:
arbitrary text documents, setting this to true may result in more pleasing
output.
+=item margin
+
+The width of the left margin in spaces. Defaults to 0. This is the margin
+for all text, including headings, not the amount by which regular text is
+indented; for the latter, see the I<indent> option. To set the right
+margin, see the I<width> option.
+
=item quotes
Sets the quote marks used to surround CE<lt>> text. If the value is a
consecutive whitespace in non-verbatim paragraphs is compressed into a
single space. Defaults to true.
+=item stderr
+
+Send error messages about invalid POD to standard error instead of
+appending a POD ERRORS section to the generated output.
+
=item width
The column at which to wrap text on the right-hand side. Defaults to 76.
=back
-The standard Pod::Parser method parse_from_filehandle() takes up to two
-arguments, the first being the file handle to read POD from and the second
-being the file handle to write the formatted output to. The first defaults
-to STDIN if not given, and the second defaults to STDOUT. The method
-parse_from_file() is almost identical, except that its two arguments are the
-input and output disk files instead. See L<Pod::Parser> for the specific
-details.
+The standard Pod::Simple method parse_file() takes one argument, the file or
+file handle to read from, and writes output to standard output unless that
+has been changed with the output_fh() method. See L<Pod::Simple> for the
+specific details and for other alternative interfaces.
=head1 DIAGNOSTICS
(F) The quote specification given (the quotes option to the constructor) was
invalid. A quote specification must be one, two, or four characters long.
-=item %s:%d: Unknown command paragraph: %s
-
-(W) The POD source contained a non-standard command paragraph (something of
-the form C<=command args>) that Pod::Man didn't know about. It was ignored.
-
-=item %s:%d: Unknown escape: %s
-
-(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
-know about.
-
-=item %s:%d: Unknown formatting code: %s
-
-(W) The POD source contained a non-standard formatting code (something of
-the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
-
-=item %s:%d: Unmatched =back
-
-(W) Pod::Text encountered a C<=back> command that didn't correspond to an
-C<=over> command.
-
=back
-=head1 RESTRICTIONS
-
-Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
-output, due to an internal implementation detail.
-
=head1 NOTES
This is a replacement for an earlier Pod::Text module written by Tom
-Christiansen. It has a revamped interface, since it now uses Pod::Parser,
+Christiansen. It has a revamped interface, since it now uses Pod::Simple,
but an interface roughly compatible with the old Pod::Text::pod2text()
function is still available. Please change to the new calling convention,
though.
=head1 SEE ALSO
-L<Pod::Parser>, L<Pod::Text::Termcap>, L<pod2text(1)>
+L<Pod::Simple>, L<Pod::Text::Termcap>, L<pod2text(1)>
+
+The current version of this module is always available from its web site at
+L<http://www.eyrie.org/~eagle/software/podlators/>. It is also part of the
+Perl core distribution as of 5.6.0.
=head1 AUTHOR
Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
Pod::Text by Tom Christiansen <tchrist@mox.perl.com> and its conversion to
-Pod::Parser by Brad Appleton <bradapp@enteract.com>.
+Pod::Parser by Brad Appleton <bradapp@enteract.com>. Sean Burke's initial
+conversion of Pod::Man to use Pod::Simple provided much-needed guidance on
+how to use Pod::Simple.
=head1 COPYRIGHT AND LICENSE
-Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
+Copyright 1999, 2000, 2001, 2002, 2004, 2006, 2008 Russ Allbery
+<rra@stanford.edu>.
This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.