1 #############################################################################
2 # Pod/PlainText.pm -- convert POD data to formatted ASCII text
4 # Derived from Tom Christiansen's Pod::PlainText module
5 # (with extensive modifications).
7 # Copyright (C) 1994-1999 Tom Christiansen. All rights reserved.
8 # This file is part of "PodParser". PodParser is free software;
9 # you can redistribute it and/or modify it under the same terms
11 #############################################################################
13 package Pod::PlainText;
15 use vars qw($VERSION);
16 $VERSION = 1.081; ## Current version of this package
17 require 5.004; ## requires this Perl version or later
21 pod2plaintext - function to convert POD data to formatted ASCII text
23 Pod::PlainText - a class for converting POD data to formatted ASCII text
28 pod2plaintext("perlfunc.pod");
34 @ISA = qw(Pod::PlainText);
37 ## constructor code ...
40 ## implementation of appropriate subclass methods ...
43 $parser = new MyParser;
44 @ARGV = ('-') unless (@ARGV > 0);
46 $parser->parse_from_file($_);
51 perl5.004, Pod::Select, Term::Cap, Exporter, Carp
59 Pod::PlainText is a module that can convert documentation in the POD
60 format (such as can be found throughout the Perl distribution) into
61 formatted ASCII. Termcap is optionally supported for
62 boldface/underline, and can be enabled via C<$Pod::PlainText::termcap=1>.
63 If termcap has not been enabled, then backspaces will be used to
64 simulate bold and underlined text.
66 A separate F<pod2plaintext> program is included that is primarily a wrapper
67 for C<Pod::PlainText::pod2plaintext()>.
69 The single function C<pod2plaintext()> can take one or two arguments. The first
70 should be the name of a file to read the pod from, or "<&STDIN" to read from
71 STDIN. A second argument, if provided, should be a filehandle glob where
72 output should be sent.
80 Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
82 Modified to derive from B<Pod::Parser> by
83 Brad Appleton E<lt>bradapp@enteract.comE<gt>
87 #############################################################################
95 use vars qw(@ISA @EXPORT %HTML_Escapes);
97 @ISA = qw(Exporter Pod::Select);
98 @EXPORT = qw(&pod2plaintext);
101 'amp' => '&', # ampersand
102 'lt' => '<', # left chevron, less-than
103 'gt' => '>', # right chevron, greater-than
104 'quot' => '"', # double quote
106 "Aacute" => "\xC1", # capital A, acute accent
107 "aacute" => "\xE1", # small a, acute accent
108 "Acirc" => "\xC2", # capital A, circumflex accent
109 "acirc" => "\xE2", # small a, circumflex accent
110 "AElig" => "\xC6", # capital AE diphthong (ligature)
111 "aelig" => "\xE6", # small ae diphthong (ligature)
112 "Agrave" => "\xC0", # capital A, grave accent
113 "agrave" => "\xE0", # small a, grave accent
114 "Aring" => "\xC5", # capital A, ring
115 "aring" => "\xE5", # small a, ring
116 "Atilde" => "\xC3", # capital A, tilde
117 "atilde" => "\xE3", # small a, tilde
118 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
119 "auml" => "\xE4", # small a, dieresis or umlaut mark
120 "Ccedil" => "\xC7", # capital C, cedilla
121 "ccedil" => "\xE7", # small c, cedilla
122 "Eacute" => "\xC9", # capital E, acute accent
123 "eacute" => "\xE9", # small e, acute accent
124 "Ecirc" => "\xCA", # capital E, circumflex accent
125 "ecirc" => "\xEA", # small e, circumflex accent
126 "Egrave" => "\xC8", # capital E, grave accent
127 "egrave" => "\xE8", # small e, grave accent
128 "ETH" => "\xD0", # capital Eth, Icelandic
129 "eth" => "\xF0", # small eth, Icelandic
130 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
131 "euml" => "\xEB", # small e, dieresis or umlaut mark
132 "Iacute" => "\xCD", # capital I, acute accent
133 "iacute" => "\xED", # small i, acute accent
134 "Icirc" => "\xCE", # capital I, circumflex accent
135 "icirc" => "\xEE", # small i, circumflex accent
136 "Igrave" => "\xCD", # capital I, grave accent
137 "igrave" => "\xED", # small i, grave accent
138 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
139 "iuml" => "\xEF", # small i, dieresis or umlaut mark
140 "Ntilde" => "\xD1", # capital N, tilde
141 "ntilde" => "\xF1", # small n, tilde
142 "Oacute" => "\xD3", # capital O, acute accent
143 "oacute" => "\xF3", # small o, acute accent
144 "Ocirc" => "\xD4", # capital O, circumflex accent
145 "ocirc" => "\xF4", # small o, circumflex accent
146 "Ograve" => "\xD2", # capital O, grave accent
147 "ograve" => "\xF2", # small o, grave accent
148 "Oslash" => "\xD8", # capital O, slash
149 "oslash" => "\xF8", # small o, slash
150 "Otilde" => "\xD5", # capital O, tilde
151 "otilde" => "\xF5", # small o, tilde
152 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
153 "ouml" => "\xF6", # small o, dieresis or umlaut mark
154 "szlig" => "\xDF", # small sharp s, German (sz ligature)
155 "THORN" => "\xDE", # capital THORN, Icelandic
156 "thorn" => "\xFE", # small thorn, Icelandic
157 "Uacute" => "\xDA", # capital U, acute accent
158 "uacute" => "\xFA", # small u, acute accent
159 "Ucirc" => "\xDB", # capital U, circumflex accent
160 "ucirc" => "\xFB", # small u, circumflex accent
161 "Ugrave" => "\xD9", # capital U, grave accent
162 "ugrave" => "\xF9", # small u, grave accent
163 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
164 "uuml" => "\xFC", # small u, dieresis or umlaut mark
165 "Yacute" => "\xDD", # capital Y, acute accent
166 "yacute" => "\xFD", # small y, acute accent
167 "yuml" => "\xFF", # small y, dieresis or umlaut mark
169 "lchevron" => "\xAB", # left chevron (double less than)
170 "rchevron" => "\xBB", # right chevron (double greater than)
173 ##---------------------------------
174 ## Function definitions begin here
175 ##---------------------------------
177 ## Try to find #columns for the tty
178 my %NotUnix = map {($_ => 1)} qw(MacOS MSWin32 VMS MVS);
180 ((defined $ENV{TERMCAP}) && ($ENV{TERMCAP} =~ /co#(\d+)/)[0])
181 or ((defined $ENV{COLUMNS}) && $ENV{COLUMNS})
182 or (!$NotUnix{$^O} && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
188 my ($infile, $outfile) = @_;
190 my $text_parser = new Pod::PlainText;
191 $text_parser->parse_from_file($infile, $outfile);
194 ##-------------------------------
195 ## Method definitions begin here
196 ##-------------------------------
200 my $class = ref($this) || $this;
202 my $self = {%params};
210 $self->SUPER::initialize();
216 my $out_fh = $self->output_handle();
217 if ($self->{NEEDSPACE}) {
219 $self->{NEEDSPACE} = 0;
226 my $map = $self->{FONTMAP};
227 return $line if $self->{USE_FORMAT};
228 if ($self->{TERMCAP}) {
229 $line = "$map->{BOLD}$line$map->{NORM}";
232 $line =~ s/(.)/$1\b$1/g;
234 # $line = "$map->{BOLD}$line$map->{NORM}" if $self->{ANSIFY};
241 my $map = $self->{FONTMAP};
242 return $line if $self->{USE_FORMAT};
243 if ($self->{TERMCAP}) {
244 $line = "$map->{UNDL}$line$map->{NORM}";
247 $line =~ s/(.)/$1\b_/g;
249 # $line = "$map->{UNDL}$line$map->{NORM}" if $self->{ANSIFY};
253 # Fill a paragraph including underlined and overstricken chars.
254 # It's not perfect for words longer than the margin, and it's probably
255 # slow, but it works.
260 my $indent_space = " " x $self->{INDENT};
261 my $marg = $self->{SCREEN} - $self->{INDENT};
262 my $line = $indent_space;
265 my $word_length = length;
266 $word_length -= 2 while /\010/g; # Subtract backspaces
268 if ($line_length + $word_length > $marg) {
269 $par .= $line . "\n";
270 $line= $indent_space . $_;
271 $line_length = $word_length;
278 $line_length += $word_length;
282 $par .= "$line\n" if length $line;
287 ## Handle a pending "item" paragraph. The paragraph (if given) is the
288 ## corresponding item text. (the item tag should be in $self->{ITEM}).
294 $cmd = '' unless (defined $cmd);
295 $_ = '' unless (defined $_);
296 my $out_fh = $self->output_handle();
297 return unless (defined $self->{ITEM});
298 my $paratag = $self->{ITEM};
299 my $prev_indent = $self->{INDENTS}->[-1] || $self->{DEF_INDENT};
302 #$self->rm_callbacks('*');
304 my $over = $self->{INDENT};
305 $over -= $prev_indent if ($prev_indent < $over);
306 if (length $cmd) { # tricked - this is another command
307 $self->output($paratag, INDENT => $prev_indent);
308 $self->command($cmd, $_);
310 elsif (/^\s+/o) { # verbatim
311 $self->output($paratag, INDENT => $prev_indent);
315 else { # plain textblock
316 $_ = $self->interpolate($_, $line);
318 if ((length $_) && (length($paratag) <= $over)) {
319 $self->IP_output($paratag, $_);
322 $self->output($paratag, INDENT => $prev_indent);
323 $self->output($_, REFORMAT => 1);
328 sub remap_whitespace {
331 tr/\000-\177/\200-\377/;
335 sub unmap_whitespace {
338 tr/\200-\377/\000-\177/;
346 my $out_fh = $self->output_handle();
347 my $tag_indent = $self->{INDENTS}->[-1] || $self->{DEF_INDENT};
348 my $tag_cols = $self->{SCREEN} - $tag_indent;
349 my $cols = $self->{SCREEN} - $self->{INDENT};
353 my $fmt_name = '_Pod_Text_IP_output_format_';
354 my $str = "format $fmt_name = \n"
355 . (" " x ($tag_indent))
356 . '@' . ('<' x ($self->{INDENT} - $tag_indent - 1))
357 . "^" . ("<" x ($cols - 1)) . "\n"
360 . (" " x ($self->{INDENT} - 2))
361 . "^" . ("<" x ($cols - 5)) . "\n"
363 #warn $str; warn "tag is $tag, _ is $_";
365 ## reset format (turn off warning about redefining a format)
370 select((select($out_fh), $~ = $fmt_name)[0]);
371 local($:) = ($self->curr_headings(1) eq 'SYNOPSIS') ? "\n " : $: ;
378 $_ = '' unless (defined $_);
379 return unless (length $_);
380 my $out_fh = $self->output_handle();
383 ## usage was $self->output($text, NAME=>VALUE, ...);
388 ## usage was $self->output($text, { NAME=>VALUE, ... } );
392 ## usage was $self->output($text, $number);
393 $options{"REFORMAT"} = shift;
396 $options{"INDENT"} = $self->{INDENT} unless (defined $options{"INDENT"});
397 if ((defined $options{"REFORMAT"}) && $options{"REFORMAT"}) {
398 my $cols = $self->{SCREEN} - $options{"INDENT"};
401 my $fmt_name = '_Pod_Text_output_format_';
402 my $str = "format $fmt_name = \n~~"
403 . (" " x ($options{"INDENT"} - 2))
404 . "^" . ("<" x ($cols - 5)) . "\n"
407 ## reset format (turn off warning about redefining a format)
412 select((select($out_fh), $~ = $fmt_name)[0]);
413 local($:) = ($self->curr_headings(1) eq 'SYNOPSIS') ? "\n " : $: ;
417 s/^/' ' x $options{"INDENT"}/gem;
427 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
430 for ($i = 0; $i <= $#items; $i++) {
431 $retstr .= "C<$items[$i]>";
432 $retstr .= ", " if @items > 2 && $i != $#items;
433 $retstr .= " and " if $i+2 == @items;
436 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
437 . " elsewhere in this document ";
446 $self->{TERMCAP} = 0;
447 #$self->{USE_FORMAT} = 1;
455 if ($self->{TERMCAP} and (! defined $self->{SETUPTERMCAP})) {
456 $self->{SETUPTERMCAP} = 1;
457 my ($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
458 $self->{FONTMAP}->{UNDL} = $term->{'_us'};
459 $self->{FONTMAP}->{INV} = $term->{'_mr'};
460 $self->{FONTMAP}->{BOLD} = $term->{'_md'};
461 $self->{FONTMAP}->{NORM} = $term->{'_me'};
464 $self->{SCREEN} = &get_screen;
466 $self->{DEF_INDENT} = 4;
467 $self->{INDENTS} = [];
468 $self->{INDENT} = $self->{DEF_INDENT};
469 $self->{NEEDSPACE} = 0;
474 $self->item('', '', '', 0) if (defined $self->{ITEM});
479 my @begun = @{ $self->{BEGUN} };
480 return (@begun > 0) ? ($begun[-1] ne 'text') : 0;
488 $cmd = '' unless (defined $cmd);
489 $_ = '' unless (defined $_);
490 my $out_fh = $self->output_handle();
492 return if (($cmd ne 'end') and $self->begun_excluded());
493 return $self->item($cmd, $_, $line) if (defined $self->{ITEM});
494 $_ = $self->interpolate($_, $line);
497 return if ($cmd eq 'pod');
498 if ($cmd eq 'head1') {
501 # print $out_fh uc($_);
503 elsif ($cmd eq 'head2') {
506 #print ' ' x $self->{DEF_INDENT}, $_;
508 s/(\w)/\xA7 $1/ if $self->{FANCY};
509 print $out_fh ' ' x ($self->{DEF_INDENT}/2), $_, "\n";
511 elsif ($cmd eq 'over') {
512 /^[-+]?\d+$/ or $_ = $self->{DEF_INDENT};
513 push(@{$self->{INDENTS}}, $self->{INDENT});
514 $self->{INDENT} += ($_ + 0);
516 elsif ($cmd eq 'back') {
517 $self->{INDENT} = pop(@{$self->{INDENTS}});
518 unless (defined $self->{INDENT}) {
519 carp "Unmatched =back\n";
520 $self->{INDENT} = $self->{DEF_INDENT};
523 elsif ($cmd eq 'begin') {
524 my ($kind) = /^(\S*)/;
525 push( @{ $self->{BEGUN} }, $kind );
527 elsif ($cmd eq 'end') {
528 pop( @{ $self->{BEGUN} } );
530 elsif ($cmd eq 'for') {
531 $self->textblock($1) if /^text\b\s*(.*)$/s;
533 elsif ($cmd eq 'item') {
535 # s/\A(\s*)\*/$1\xb7/ if $self->{FANCY};
536 # s/^(\s*\*\s+)/$1 /;
538 #$self->add_callbacks('*', SUB => \&item);
541 carp "Unrecognized directive: $cmd\n";
549 return if $self->begun_excluded();
550 return $self->item('', $_, $line) if (defined $self->{ITEM});
552 #$self->{NEEDSPACE} = 1;
559 return if $self->begun_excluded();
560 return $self->item('', $text, $line) if (defined $self->{ITEM});
561 local($_) = $self->interpolate($text, $line);
564 $self->output($_, REFORMAT => 1);
567 sub interior_sequence {
573 my ($pre, $post) = ("`", "'");
574 ($pre, $post) = ($HTML_Escapes{"lchevron"}, $HTML_Escapes{"rchevron"})
575 if ((defined $self->{FANCY}) && $self->{FANCY});
576 $_ = $pre . $_ . $post;
578 elsif ($cmd eq 'E') {
579 if (defined $HTML_Escapes{$_}) {
580 $_ = $HTML_Escapes{$_};
583 carp "Unknown escape: E<$_>";
587 # elsif ($cmd eq 'B') {
588 # $_ = $self->bold($_);
590 elsif ($cmd eq 'I') {
591 # $_ = $self->italic($_);
594 elsif (($cmd eq 'X') || ($cmd eq 'Z')) {
597 elsif ($cmd eq 'S') {
598 # Escape whitespace until we are ready to print
599 #$_ = $self->remap_whitespace($_);
601 elsif ($cmd eq 'L') {
603 my ($text, $manpage, $sec, $ref) = ('', $_, '', '');
604 if (/\A(.*?)\|(.*)\Z/) {
608 if (/^\s*"\s*(.*)\s*"\s*$/o) {
609 ($manpage, $sec) = ('', "\"$1\"");
611 elsif (m|\s*/\s*|s) {
612 ($manpage, $sec) = split(/\s*\/\s*/, $_, 2);
615 $ref .= "the $manpage manpage" if (length $manpage);
617 elsif ($sec =~ /^\s*"\s*(.*)\s*"\s*$/o) {
618 $ref .= "the section on \"$1\"";
619 $ref .= " in the $manpage manpage" if (length $manpage);
622 $ref .= "the \"$sec\" entry";
623 $ref .= (length $manpage) ? " in the $manpage manpage"
627 #if ( m{^ ([a-zA-Z][^\s\/]+) (\([^\)]+\))? $}x ) {
628 # ## LREF: a manpage(3f)
629 # $_ = "the $1$2 manpage";
631 #elsif ( m{^ ([^/]+) / ([:\w]+(\(\))?) $}x ) {
632 # ## LREF: an =item on another manpage
633 # $_ = "the \"$2\" entry in the $1 manpage";
635 #elsif ( m{^ / ([:\w]+(\(\))?) $}x ) {
636 # ## LREF: an =item on this manpage
637 # $_ = $self->internal_lrefs($1);
639 #elsif ( m{^ (?: ([a-zA-Z]\S+?) / )? "?(.*?)"? $}x ) {
640 # ## LREF: a =head2 (head1?), maybe on a manpage, maybe right here
641 # ## the "func" can disambiguate
642 # $_ = ((defined $1) && $1)
643 # ? "the section on \"$2\" in the $1 manpage"
644 # : "the section on \"$2\"";