major pod update from Tom Christiansen
[p5sagit/p5-mst-13.2.git] / lib / Pod / PlainText.pm
CommitLineData
360aca43 1#############################################################################
2# Pod/PlainText.pm -- convert POD data to formatted ASCII text
3#
4# Derived from Tom Christiansen's Pod::PlainText module
5# (with extensive modifications).
6#
7# Copyright (C) 1994-1999 Tom Christiansen. All rights reserved.
8# This file is part of "PodParser". PodParser is free software;
9# you can redistribute it and/or modify it under the same terms
10# as Perl itself.
11#############################################################################
12
13package Pod::PlainText;
14
15use vars qw($VERSION);
e9fdc7d2 16$VERSION = 1.081; ## Current version of this package
360aca43 17require 5.004; ## requires this Perl version or later
18
19=head1 NAME
20
21pod2plaintext - function to convert POD data to formatted ASCII text
22
23Pod::PlainText - a class for converting POD data to formatted ASCII text
24
25=head1 SYNOPSIS
26
27 use Pod::PlainText;
28 pod2plaintext("perlfunc.pod");
29
30or
31
32 use Pod::PlainText;
33 package MyParser;
34 @ISA = qw(Pod::PlainText);
35
36 sub new {
37 ## constructor code ...
38 }
39
40 ## implementation of appropriate subclass methods ...
41
42 package main;
43 $parser = new MyParser;
44 @ARGV = ('-') unless (@ARGV > 0);
45 for (@ARGV) {
46 $parser->parse_from_file($_);
47 }
48
49=head1 REQUIRES
50
51perl5.004, Pod::Select, Term::Cap, Exporter, Carp
52
53=head1 EXPORTS
54
55pod2plaintext()
56
57=head1 DESCRIPTION
58
59Pod::PlainText is a module that can convert documentation in the POD
60format (such as can be found throughout the Perl distribution) into
61formatted ASCII. Termcap is optionally supported for
62boldface/underline, and can be enabled via C<$Pod::PlainText::termcap=1>.
63If termcap has not been enabled, then backspaces will be used to
64simulate bold and underlined text.
65
66A separate F<pod2plaintext> program is included that is primarily a wrapper
67for C<Pod::PlainText::pod2plaintext()>.
68
69The single function C<pod2plaintext()> can take one or two arguments. The first
70should be the name of a file to read the pod from, or "<&STDIN" to read from
71STDIN. A second argument, if provided, should be a filehandle glob where
72output should be sent.
73
74=head1 SEE ALSO
75
76L<Pod::Parser>.
77
78=head1 AUTHOR
79
80Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
81
82Modified to derive from B<Pod::Parser> by
83Brad Appleton E<lt>bradapp@enteract.comE<gt>
84
85=cut
86
87#############################################################################
88
89use strict;
90#use diagnostics;
91use Carp;
92use Exporter;
93use Pod::Select;
94use Term::Cap;
95use vars qw(@ISA @EXPORT %HTML_Escapes);
96
97@ISA = qw(Exporter Pod::Select);
98@EXPORT = qw(&pod2plaintext);
99
100%HTML_Escapes = (
101 'amp' => '&', # ampersand
102 'lt' => '<', # left chevron, less-than
103 'gt' => '>', # right chevron, greater-than
104 'quot' => '"', # double quote
105
106 "Aacute" => "\xC1", # capital A, acute accent
107 "aacute" => "\xE1", # small a, acute accent
108 "Acirc" => "\xC2", # capital A, circumflex accent
109 "acirc" => "\xE2", # small a, circumflex accent
110 "AElig" => "\xC6", # capital AE diphthong (ligature)
111 "aelig" => "\xE6", # small ae diphthong (ligature)
112 "Agrave" => "\xC0", # capital A, grave accent
113 "agrave" => "\xE0", # small a, grave accent
114 "Aring" => "\xC5", # capital A, ring
115 "aring" => "\xE5", # small a, ring
116 "Atilde" => "\xC3", # capital A, tilde
117 "atilde" => "\xE3", # small a, tilde
118 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
119 "auml" => "\xE4", # small a, dieresis or umlaut mark
120 "Ccedil" => "\xC7", # capital C, cedilla
121 "ccedil" => "\xE7", # small c, cedilla
122 "Eacute" => "\xC9", # capital E, acute accent
123 "eacute" => "\xE9", # small e, acute accent
124 "Ecirc" => "\xCA", # capital E, circumflex accent
125 "ecirc" => "\xEA", # small e, circumflex accent
126 "Egrave" => "\xC8", # capital E, grave accent
127 "egrave" => "\xE8", # small e, grave accent
128 "ETH" => "\xD0", # capital Eth, Icelandic
129 "eth" => "\xF0", # small eth, Icelandic
130 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
131 "euml" => "\xEB", # small e, dieresis or umlaut mark
132 "Iacute" => "\xCD", # capital I, acute accent
133 "iacute" => "\xED", # small i, acute accent
134 "Icirc" => "\xCE", # capital I, circumflex accent
135 "icirc" => "\xEE", # small i, circumflex accent
136 "Igrave" => "\xCD", # capital I, grave accent
137 "igrave" => "\xED", # small i, grave accent
138 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
139 "iuml" => "\xEF", # small i, dieresis or umlaut mark
140 "Ntilde" => "\xD1", # capital N, tilde
141 "ntilde" => "\xF1", # small n, tilde
142 "Oacute" => "\xD3", # capital O, acute accent
143 "oacute" => "\xF3", # small o, acute accent
144 "Ocirc" => "\xD4", # capital O, circumflex accent
145 "ocirc" => "\xF4", # small o, circumflex accent
146 "Ograve" => "\xD2", # capital O, grave accent
147 "ograve" => "\xF2", # small o, grave accent
148 "Oslash" => "\xD8", # capital O, slash
149 "oslash" => "\xF8", # small o, slash
150 "Otilde" => "\xD5", # capital O, tilde
151 "otilde" => "\xF5", # small o, tilde
152 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
153 "ouml" => "\xF6", # small o, dieresis or umlaut mark
154 "szlig" => "\xDF", # small sharp s, German (sz ligature)
155 "THORN" => "\xDE", # capital THORN, Icelandic
156 "thorn" => "\xFE", # small thorn, Icelandic
157 "Uacute" => "\xDA", # capital U, acute accent
158 "uacute" => "\xFA", # small u, acute accent
159 "Ucirc" => "\xDB", # capital U, circumflex accent
160 "ucirc" => "\xFB", # small u, circumflex accent
161 "Ugrave" => "\xD9", # capital U, grave accent
162 "ugrave" => "\xF9", # small u, grave accent
163 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
164 "uuml" => "\xFC", # small u, dieresis or umlaut mark
165 "Yacute" => "\xDD", # capital Y, acute accent
166 "yacute" => "\xFD", # small y, acute accent
167 "yuml" => "\xFF", # small y, dieresis or umlaut mark
168
169 "lchevron" => "\xAB", # left chevron (double less than)
170 "rchevron" => "\xBB", # right chevron (double greater than)
171);
172
173##---------------------------------
174## Function definitions begin here
175##---------------------------------
176
177 ## Try to find #columns for the tty
178my %NotUnix = map {($_ => 1)} qw(MacOS MSWin32 VMS MVS);
179sub get_screen {
180 ((defined $ENV{TERMCAP}) && ($ENV{TERMCAP} =~ /co#(\d+)/)[0])
181 or ((defined $ENV{COLUMNS}) && $ENV{COLUMNS})
182 or (!$NotUnix{$^O} && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
183 or 72;
184
185}
186
187sub pod2plaintext {
188 my ($infile, $outfile) = @_;
189 local $_;
190 my $text_parser = new Pod::PlainText;
191 $text_parser->parse_from_file($infile, $outfile);
192}
193
194##-------------------------------
195## Method definitions begin here
196##-------------------------------
197
198sub new {
199 my $this = shift;
200 my $class = ref($this) || $this;
201 my %params = @_;
202 my $self = {%params};
203 bless $self, $class;
204 $self->initialize();
205 return $self;
206}
207
208sub initialize {
209 my $self = shift;
210 $self->SUPER::initialize();
211 return;
212}
213
214sub makespace {
215 my $self = shift;
216 my $out_fh = $self->output_handle();
217 if ($self->{NEEDSPACE}) {
218 print $out_fh "\n";
219 $self->{NEEDSPACE} = 0;
220 }
221}
222
223sub bold {
224 my $self = shift;
225 my $line = shift;
226 my $map = $self->{FONTMAP};
227 return $line if $self->{USE_FORMAT};
228 if ($self->{TERMCAP}) {
229 $line = "$map->{BOLD}$line$map->{NORM}";
230 }
231 else {
232 $line =~ s/(.)/$1\b$1/g;
233 }
234# $line = "$map->{BOLD}$line$map->{NORM}" if $self->{ANSIFY};
235 return $line;
236}
237
238sub italic {
239 my $self = shift;
240 my $line = shift;
241 my $map = $self->{FONTMAP};
242 return $line if $self->{USE_FORMAT};
243 if ($self->{TERMCAP}) {
244 $line = "$map->{UNDL}$line$map->{NORM}";
245 }
246 else {
247 $line =~ s/(.)/$1\b_/g;
248 }
249# $line = "$map->{UNDL}$line$map->{NORM}" if $self->{ANSIFY};
250 return $line;
251}
252
253# Fill a paragraph including underlined and overstricken chars.
254# It's not perfect for words longer than the margin, and it's probably
255# slow, but it works.
256sub fill {
257 my $self = shift;
258 local $_ = shift;
259 my $par = "";
260 my $indent_space = " " x $self->{INDENT};
261 my $marg = $self->{SCREEN} - $self->{INDENT};
262 my $line = $indent_space;
263 my $line_length;
264 foreach (split) {
265 my $word_length = length;
266 $word_length -= 2 while /\010/g; # Subtract backspaces
267
268 if ($line_length + $word_length > $marg) {
269 $par .= $line . "\n";
270 $line= $indent_space . $_;
271 $line_length = $word_length;
272 }
273 else {
274 if ($line_length) {
275 $line_length++;
276 $line .= " ";
277 }
278 $line_length += $word_length;
279 $line .= $_;
280 }
281 }
e9fdc7d2 282 $par .= "$line\n" if length $line;
360aca43 283 $par .= "\n";
284 return $par;
285}
286
287## Handle a pending "item" paragraph. The paragraph (if given) is the
288## corresponding item text. (the item tag should be in $self->{ITEM}).
289sub item {
290 my $self = shift;
291 my $cmd = shift;
292 local $_ = shift;
293 my $line = shift;
294 $cmd = '' unless (defined $cmd);
295 $_ = '' unless (defined $_);
296 my $out_fh = $self->output_handle();
297 return unless (defined $self->{ITEM});
298 my $paratag = $self->{ITEM};
299 my $prev_indent = $self->{INDENTS}->[-1] || $self->{DEF_INDENT};
300 ## reset state
301 undef $self->{ITEM};
302 #$self->rm_callbacks('*');
303
304 my $over = $self->{INDENT};
305 $over -= $prev_indent if ($prev_indent < $over);
306 if (length $cmd) { # tricked - this is another command
307 $self->output($paratag, INDENT => $prev_indent);
308 $self->command($cmd, $_);
309 }
310 elsif (/^\s+/o) { # verbatim
311 $self->output($paratag, INDENT => $prev_indent);
312 s/\s+\Z//;
313 $self->verbatim($_);
314 }
315 else { # plain textblock
316 $_ = $self->interpolate($_, $line);
317 s/\s+\Z//;
318 if ((length $_) && (length($paratag) <= $over)) {
319 $self->IP_output($paratag, $_);
320 }
321 else {
322 $self->output($paratag, INDENT => $prev_indent);
323 $self->output($_, REFORMAT => 1);
324 }
325 }
326}
327
328sub remap_whitespace {
329 my $self = shift;
330 local($_) = shift;
331 tr/\000-\177/\200-\377/;
332 return $_;
333}
334
335sub unmap_whitespace {
336 my $self = shift;
337 local($_) = shift;
338 tr/\200-\377/\000-\177/;
339 return $_;
340}
341
342sub IP_output {
343 my $self = shift;
344 my $tag = shift;
345 local($_) = @_;
346 my $out_fh = $self->output_handle();
347 my $tag_indent = $self->{INDENTS}->[-1] || $self->{DEF_INDENT};
348 my $tag_cols = $self->{SCREEN} - $tag_indent;
349 my $cols = $self->{SCREEN} - $self->{INDENT};
350 $tag =~ s/\s*$//;
351 s/\s+/ /g;
352 s/^ //;
353 my $fmt_name = '_Pod_Text_IP_output_format_';
354 my $str = "format $fmt_name = \n"
355 . (" " x ($tag_indent))
356 . '@' . ('<' x ($self->{INDENT} - $tag_indent - 1))
357 . "^" . ("<" x ($cols - 1)) . "\n"
358 . '$tag, $_'
359 . "\n~~"
360 . (" " x ($self->{INDENT} - 2))
361 . "^" . ("<" x ($cols - 5)) . "\n"
362 . '$_' . "\n\n.\n1";
363 #warn $str; warn "tag is $tag, _ is $_";
364 {
365 ## reset format (turn off warning about redefining a format)
366 local($^W) = 0;
367 eval $str;
368 croak if ($@);
369 }
370 select((select($out_fh), $~ = $fmt_name)[0]);
371 local($:) = ($self->curr_headings(1) eq 'SYNOPSIS') ? "\n " : $: ;
372 write $out_fh;
373}
374
375sub output {
376 my $self = shift;
377 local $_ = shift;
378 $_ = '' unless (defined $_);
379 return unless (length $_);
380 my $out_fh = $self->output_handle();
381 my %options;
382 if (@_ > 1) {
383 ## usage was $self->output($text, NAME=>VALUE, ...);
384 %options = @_;
385 }
386 elsif (@_ == 1) {
387 if (ref $_[0]) {
388 ## usage was $self->output($text, { NAME=>VALUE, ... } );
389 %options = %{$_[0]};
390 }
391 else {
392 ## usage was $self->output($text, $number);
393 $options{"REFORMAT"} = shift;
394 }
395 }
396 $options{"INDENT"} = $self->{INDENT} unless (defined $options{"INDENT"});
397 if ((defined $options{"REFORMAT"}) && $options{"REFORMAT"}) {
398 my $cols = $self->{SCREEN} - $options{"INDENT"};
399 s/\s+/ /g;
400 s/^ //;
401 my $fmt_name = '_Pod_Text_output_format_';
402 my $str = "format $fmt_name = \n~~"
403 . (" " x ($options{"INDENT"} - 2))
404 . "^" . ("<" x ($cols - 5)) . "\n"
405 . '$_' . "\n\n.\n1";
406 {
407 ## reset format (turn off warning about redefining a format)
408 local($^W) = 0;
409 eval $str;
410 croak if ($@);
411 }
412 select((select($out_fh), $~ = $fmt_name)[0]);
413 local($:) = ($self->curr_headings(1) eq 'SYNOPSIS') ? "\n " : $: ;
414 write $out_fh;
415 }
416 else {
417 s/^/' ' x $options{"INDENT"}/gem;
418 s/^\s+\n$/\n/gm;
419 print $out_fh $_;
420 }
421}
422
423sub internal_lrefs {
424 my $self = shift;
425 local $_ = shift;
426 s{L</([^>]+)>}{$1}g;
427 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
428 my $retstr = "the ";
429 my $i;
430 for ($i = 0; $i <= $#items; $i++) {
431 $retstr .= "C<$items[$i]>";
432 $retstr .= ", " if @items > 2 && $i != $#items;
433 $retstr .= " and " if $i+2 == @items;
434 }
435
436 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
437 . " elsewhere in this document ";
438
439 return $retstr;
440}
441
442sub begin_pod {
443 my $self = shift;
444
445 $self->{BEGUN} = [];
446 $self->{TERMCAP} = 0;
447 #$self->{USE_FORMAT} = 1;
448
449 $self->{FONTMAP} = {
450 UNDL => "\x1b[4m",
451 INV => "\x1b[7m",
452 BOLD => "\x1b[1m",
453 NORM => "\x1b[0m",
454 };
455 if ($self->{TERMCAP} and (! defined $self->{SETUPTERMCAP})) {
456 $self->{SETUPTERMCAP} = 1;
457 my ($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
458 $self->{FONTMAP}->{UNDL} = $term->{'_us'};
459 $self->{FONTMAP}->{INV} = $term->{'_mr'};
460 $self->{FONTMAP}->{BOLD} = $term->{'_md'};
461 $self->{FONTMAP}->{NORM} = $term->{'_me'};
462 }
463
464 $self->{SCREEN} = &get_screen;
465 $self->{FANCY} = 0;
466 $self->{DEF_INDENT} = 4;
467 $self->{INDENTS} = [];
468 $self->{INDENT} = $self->{DEF_INDENT};
469 $self->{NEEDSPACE} = 0;
470}
471
472sub end_pod {
473 my $self = shift;
474 $self->item('', '', '', 0) if (defined $self->{ITEM});
475}
476
477sub begun_excluded {
478 my $self = shift;
479 my @begun = @{ $self->{BEGUN} };
480 return (@begun > 0) ? ($begun[-1] ne 'text') : 0;
481}
482
483sub command {
484 my $self = shift;
485 my $cmd = shift;
486 local $_ = shift;
487 my $line = shift;
488 $cmd = '' unless (defined $cmd);
489 $_ = '' unless (defined $_);
490 my $out_fh = $self->output_handle();
491
492 return if (($cmd ne 'end') and $self->begun_excluded());
493 return $self->item($cmd, $_, $line) if (defined $self->{ITEM});
494 $_ = $self->interpolate($_, $line);
495 s/\s+\Z/\n/;
496
497 return if ($cmd eq 'pod');
498 if ($cmd eq 'head1') {
499 $self->makespace();
500 print $out_fh $_;
501 # print $out_fh uc($_);
502 }
503 elsif ($cmd eq 'head2') {
504 $self->makespace();
505 # s/(\w+)/\u\L$1/g;
506 #print ' ' x $self->{DEF_INDENT}, $_;
507 # print "\xA7";
508 s/(\w)/\xA7 $1/ if $self->{FANCY};
509 print $out_fh ' ' x ($self->{DEF_INDENT}/2), $_, "\n";
510 }
511 elsif ($cmd eq 'over') {
512 /^[-+]?\d+$/ or $_ = $self->{DEF_INDENT};
513 push(@{$self->{INDENTS}}, $self->{INDENT});
514 $self->{INDENT} += ($_ + 0);
515 }
516 elsif ($cmd eq 'back') {
517 $self->{INDENT} = pop(@{$self->{INDENTS}});
518 unless (defined $self->{INDENT}) {
519 carp "Unmatched =back\n";
520 $self->{INDENT} = $self->{DEF_INDENT};
521 }
522 }
523 elsif ($cmd eq 'begin') {
524 my ($kind) = /^(\S*)/;
525 push( @{ $self->{BEGUN} }, $kind );
526 }
527 elsif ($cmd eq 'end') {
528 pop( @{ $self->{BEGUN} } );
529 }
530 elsif ($cmd eq 'for') {
531 $self->textblock($1) if /^text\b\s*(.*)$/s;
532 }
533 elsif ($cmd eq 'item') {
534 $self->makespace();
535 # s/\A(\s*)\*/$1\xb7/ if $self->{FANCY};
536 # s/^(\s*\*\s+)/$1 /;
537 $self->{ITEM} = $_;
538 #$self->add_callbacks('*', SUB => \&item);
539 }
540 else {
541 carp "Unrecognized directive: $cmd\n";
542 }
543}
544
545sub verbatim {
546 my $self = shift;
547 local $_ = shift;
548 my $line = shift;
549 return if $self->begun_excluded();
550 return $self->item('', $_, $line) if (defined $self->{ITEM});
551 $self->output($_);
552 #$self->{NEEDSPACE} = 1;
553}
554
555sub textblock {
556 my $self = shift;
557 my $text = shift;
558 my $line = shift;
559 return if $self->begun_excluded();
560 return $self->item('', $text, $line) if (defined $self->{ITEM});
561 local($_) = $self->interpolate($text, $line);
562 s/\s*\Z/\n/;
563 $self->makespace();
564 $self->output($_, REFORMAT => 1);
565}
566
567sub interior_sequence {
568 my $self = shift;
569 my $cmd = shift;
570 my $arg = shift;
571 local($_) = $arg;
572 if ($cmd eq 'C') {
573 my ($pre, $post) = ("`", "'");
574 ($pre, $post) = ($HTML_Escapes{"lchevron"}, $HTML_Escapes{"rchevron"})
575 if ((defined $self->{FANCY}) && $self->{FANCY});
576 $_ = $pre . $_ . $post;
577 }
578 elsif ($cmd eq 'E') {
579 if (defined $HTML_Escapes{$_}) {
580 $_ = $HTML_Escapes{$_};
581 }
582 else {
583 carp "Unknown escape: E<$_>";
584 $_ = "E<$_>";
585 }
586 # }
587 # elsif ($cmd eq 'B') {
588 # $_ = $self->bold($_);
589 }
590 elsif ($cmd eq 'I') {
591 # $_ = $self->italic($_);
592 $_ = "*" . $_ . "*";
593 }
594 elsif (($cmd eq 'X') || ($cmd eq 'Z')) {
595 $_ = '';
596 }
597 elsif ($cmd eq 'S') {
598 # Escape whitespace until we are ready to print
599 #$_ = $self->remap_whitespace($_);
600 }
601 elsif ($cmd eq 'L') {
602 s/\s+/ /g;
603 my ($text, $manpage, $sec, $ref) = ('', $_, '', '');
604 if (/\A(.*?)\|(.*)\Z/) {
605 $text = $1;
606 $manpage = $_ = $2;
607 }
608 if (/^\s*"\s*(.*)\s*"\s*$/o) {
609 ($manpage, $sec) = ('', "\"$1\"");
610 }
611 elsif (m|\s*/\s*|s) {
612 ($manpage, $sec) = split(/\s*\/\s*/, $_, 2);
613 }
614 if (! length $sec) {
615 $ref .= "the $manpage manpage" if (length $manpage);
616 }
617 elsif ($sec =~ /^\s*"\s*(.*)\s*"\s*$/o) {
618 $ref .= "the section on \"$1\"";
619 $ref .= " in the $manpage manpage" if (length $manpage);
620 }
621 else {
622 $ref .= "the \"$sec\" entry";
623 $ref .= (length $manpage) ? " in the $manpage manpage"
624 : " in this manpage"
625 }
626 $_ = $text || $ref;
627 #if ( m{^ ([a-zA-Z][^\s\/]+) (\([^\)]+\))? $}x ) {
628 # ## LREF: a manpage(3f)
629 # $_ = "the $1$2 manpage";
630 #}
631 #elsif ( m{^ ([^/]+) / ([:\w]+(\(\))?) $}x ) {
632 # ## LREF: an =item on another manpage
633 # $_ = "the \"$2\" entry in the $1 manpage";
634 #}
635 #elsif ( m{^ / ([:\w]+(\(\))?) $}x ) {
636 # ## LREF: an =item on this manpage
637 # $_ = $self->internal_lrefs($1);
638 #}
639 #elsif ( m{^ (?: ([a-zA-Z]\S+?) / )? "?(.*?)"? $}x ) {
640 # ## LREF: a =head2 (head1?), maybe on a manpage, maybe right here
641 # ## the "func" can disambiguate
642 # $_ = ((defined $1) && $1)
643 # ? "the section on \"$2\" in the $1 manpage"
644 # : "the section on \"$2\"";
645 #}
646 }
647 return $_;
648}
649
6501;