5 Pod::Text - convert POD data to formatted ASCII text
11 pod2text("perlfunc.pod");
15 pod2text [B<-a>] [B<->I<width>] < input.pod
19 Pod::Text is a module that can convert documentation in the POD format (such
20 as can be found throughout the Perl distribution) into formatted ASCII.
21 Termcap is optionally supported for boldface/underline, and can enabled via
22 C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
23 will be used to simulate bold and underlined text.
25 A separate F<pod2text> program is included that is primarily a wrapper for
28 The single function C<pod2text()> can take the optional options B<-a>
29 for an alternative output format, then a B<->I<width> option with the
30 max terminal width, followed by one or two arguments. The first
31 should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from
32 STDIN. A second argument, if provided, should be a filehandle glob where
33 output should be sent.
37 Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt>
41 Cleanup work. The input and output locations need to be more flexible,
42 termcap shouldn't be a global variable, and the terminal speed needs to
43 be properly calculated.
50 @EXPORT = qw(pod2text);
52 use vars qw($VERSION);
55 use locale; # make \w work right in non-ASCII lands
69 shift if $opt_alt_format = ($_[0] eq '-a');
71 if($termcap and !$setuptermcap) {
74 my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
75 $UNDL = $term->{'_us'};
76 $INV = $term->{'_mr'};
77 $BOLD = $term->{'_md'};
78 $NORM = $term->{'_me'};
81 $SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
83 || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
84 || ($^O ne 'MSWin32' && $^O ne 'dos' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
87 @_ = ("<&STDIN") unless @_;
88 local($file,*OUTPUT) = @_;
89 *OUTPUT = *STDOUT if @_<2;
92 $: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''.
100 $indent = $DEF_INDENT;
104 open(IN, $file) || die "Couldn't open $file: $!";
106 POD_DIRECTIVE: while (<IN>) {
112 if (/^=end\s+$begun/) {
115 elsif ($begun eq "text") {
120 1 while s{^(.*?)(\t+)(.*)$}{
122 . (' ' x (length($2) * 8 - length($1) % 8))
125 # Translate verbatim paragraph
131 if (/^=for\s+(\S+)\s*(.*)/s) {
139 elsif (/^=begin\s+(\S+)\s*(.*)/s) {
147 sub prepare_for_output {
152 # need to hide E<> first; they're processed in clear_noremap
153 s/(E<[^<>]+>)/noremap($1)/ge;
155 while ($maxnest-- && /[A-Z]</) {
157 if ($opt_alt_format) {
158 s/[BC]<(.*?)>/``$1''/sg;
164 s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge;
166 # s/[IF]<(.*?)>/italic($1)/ge;
168 # s/[CB]<(.*?)>/bold($1)/ge;
171 # LREF: a la HREF L<show this text|man/section>
172 s:L<([^|>]+)\|[^>]+>:$1:g;
174 # LREF: a manpage(3f)
175 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
176 # LREF: an =item on another manpage
186 } {the "$2" entry in the $1 manpage}gx;
188 # LREF: an =item on this manpage
200 } { internal_lrefs($1) }gex;
202 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
203 # the "func" can disambiguate
213 $1 # if no $1, assume it means on this page.
214 ? "the section on \"$2\" in the $1 manpage"
215 : "the section on \"$2\""
219 s/[A-Z]<(.*?)>/$1/sg;
227 # $needspace = 0; # Assume this.
229 ($Cmd, $_) = split(' ', $_, 2);
234 elsif ($Cmd eq 'pod') {
237 elsif ($Cmd eq 'head1') {
239 if ($opt_alt_format) {
241 s/^(.+?)[ \t]*$/==== $1 ====/;
244 # print OUTPUT uc($_);
245 $needspace = $opt_alt_format;
247 elsif ($Cmd eq 'head2') {
250 #print ' ' x $DEF_INDENT, $_;
252 s/(\w)/\xA7 $1/ if $FANCY;
253 if ($opt_alt_format) {
254 s/^(.+?)[ \t]*$/== $1 ==/;
255 print OUTPUT "\n", $_;
257 print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
259 $needspace = $opt_alt_format;
261 elsif ($Cmd eq 'over') {
262 push(@indent,$indent);
263 $indent += ($_ + 0) || $DEF_INDENT;
265 elsif ($Cmd eq 'back') {
266 $indent = pop(@indent);
267 warn "Unmatched =back\n" unless defined $indent;
269 elsif ($Cmd eq 'item') {
271 # s/\A(\s*)\*/$1\xb7/ if $FANCY;
272 # s/^(\s*\*\s+)/$1 /;
274 if (length() + 3 < $indent) {
277 if (/^[=\s]/) { # tricked!, or verbatim paragraph
278 local($indent) = $indent[$#indent - 1] || $DEF_INDENT;
283 IP_output($paratag, $_);
285 local($indent) = $indent[$#indent - 1] || $DEF_INDENT;
291 warn "Unrecognized directive: $Cmd\n";
305 #########################################################################
316 return $line if $use_format;
318 $line = "$BOLD$line$NORM";
320 $line =~ s/(.)/$1\b$1/g;
322 # $line = "$BOLD$line$NORM" if $ansify;
328 return $line if $use_format;
330 $line = "$UNDL$line$NORM";
332 $line =~ s/(.)/$1\b_/g;
334 # $line = "$UNDL$line$NORM" if $ansify;
338 # Fill a paragraph including underlined and overstricken chars.
339 # It's not perfect for words longer than the margin, and it's probably
340 # slow, but it works.
344 my $indent_space = " " x $indent;
345 my $marg = $SCREEN-$indent;
346 my $line = $indent_space;
349 my $word_length = length;
350 $word_length -= 2 while /\010/g; # Subtract backspaces
352 if ($line_length + $word_length > $marg) {
353 $par .= $line . "\n";
354 $line= $indent_space . $_;
355 $line_length = $word_length;
362 $line_length += $word_length;
366 $par .= "$line\n" if $line;
372 local($tag, $_) = @_;
373 local($tag_indent) = $indent[$#indent - 1] || $DEF_INDENT;
374 $tag_cols = $SCREEN - $tag_indent;
375 $cols = $SCREEN - $indent;
379 $str = "format OUTPUT = \n"
380 . (($opt_alt_format && $tag_indent > 1)
381 ? ":" . " " x ($tag_indent - 1)
382 : " " x ($tag_indent))
383 . '@' . ('<' x ($indent - $tag_indent - 1))
384 . "^" . ("<" x ($cols - 1)) . "\n"
387 . (" " x ($indent-2))
388 . "^" . ("<" x ($cols - 5)) . "\n"
390 #warn $str; warn "tag is $tag, _ is $_";
396 local($_, $reformat) = @_;
398 $cols = $SCREEN - $indent;
401 $str = "format OUTPUT = \n~~"
402 . (" " x ($indent-2))
403 . "^" . ("<" x ($cols - 5)) . "\n"
408 s/^/' ' x $indent/gem;
410 s/^ /: /s if defined($reformat) && $opt_alt_format;
416 local($thing_to_hide) = shift;
417 $thing_to_hide =~ tr/\000-\177/\200-\377/;
418 return $thing_to_hide;
422 die "unmatched init" if $mapready++;
423 #mask off high bit characters in input stream
424 s/([\200-\377])/"E<".ord($1).">"/ge;
428 my $ready_to_print = $_[0];
429 die "unmatched clear" unless $mapready--;
430 tr/\200-\377/\000-\177/;
431 # now for the E<>s, which have been hidden until now
432 # otherwise the interative \w<> processing would have
433 # been hosed by the E<gt>
446 defined $HTML_Escapes{$3}
447 ? do { $HTML_Escapes{$3} }
449 warn "Unknown escape: E<$1> in $_";
453 }egx if $ready_to_print;
459 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
462 for ($i = 0; $i <= $#items; $i++) {
463 $retstr .= "C<$items[$i]>";
464 $retstr .= ", " if @items > 2 && $i != $#items;
465 $retstr .= " and " if $i+2 == @items;
468 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
469 . " elsewhere in this document ";
478 'amp' => '&', # ampersand
479 'lt' => '<', # left chevron, less-than
480 'gt' => '>', # right chevron, greater-than
481 'quot' => '"', # double quote
483 "Aacute" => "\xC1", # capital A, acute accent
484 "aacute" => "\xE1", # small a, acute accent
485 "Acirc" => "\xC2", # capital A, circumflex accent
486 "acirc" => "\xE2", # small a, circumflex accent
487 "AElig" => "\xC6", # capital AE diphthong (ligature)
488 "aelig" => "\xE6", # small ae diphthong (ligature)
489 "Agrave" => "\xC0", # capital A, grave accent
490 "agrave" => "\xE0", # small a, grave accent
491 "Aring" => "\xC5", # capital A, ring
492 "aring" => "\xE5", # small a, ring
493 "Atilde" => "\xC3", # capital A, tilde
494 "atilde" => "\xE3", # small a, tilde
495 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
496 "auml" => "\xE4", # small a, dieresis or umlaut mark
497 "Ccedil" => "\xC7", # capital C, cedilla
498 "ccedil" => "\xE7", # small c, cedilla
499 "Eacute" => "\xC9", # capital E, acute accent
500 "eacute" => "\xE9", # small e, acute accent
501 "Ecirc" => "\xCA", # capital E, circumflex accent
502 "ecirc" => "\xEA", # small e, circumflex accent
503 "Egrave" => "\xC8", # capital E, grave accent
504 "egrave" => "\xE8", # small e, grave accent
505 "ETH" => "\xD0", # capital Eth, Icelandic
506 "eth" => "\xF0", # small eth, Icelandic
507 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
508 "euml" => "\xEB", # small e, dieresis or umlaut mark
509 "Iacute" => "\xCD", # capital I, acute accent
510 "iacute" => "\xED", # small i, acute accent
511 "Icirc" => "\xCE", # capital I, circumflex accent
512 "icirc" => "\xEE", # small i, circumflex accent
513 "Igrave" => "\xCD", # capital I, grave accent
514 "igrave" => "\xED", # small i, grave accent
515 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
516 "iuml" => "\xEF", # small i, dieresis or umlaut mark
517 "Ntilde" => "\xD1", # capital N, tilde
518 "ntilde" => "\xF1", # small n, tilde
519 "Oacute" => "\xD3", # capital O, acute accent
520 "oacute" => "\xF3", # small o, acute accent
521 "Ocirc" => "\xD4", # capital O, circumflex accent
522 "ocirc" => "\xF4", # small o, circumflex accent
523 "Ograve" => "\xD2", # capital O, grave accent
524 "ograve" => "\xF2", # small o, grave accent
525 "Oslash" => "\xD8", # capital O, slash
526 "oslash" => "\xF8", # small o, slash
527 "Otilde" => "\xD5", # capital O, tilde
528 "otilde" => "\xF5", # small o, tilde
529 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
530 "ouml" => "\xF6", # small o, dieresis or umlaut mark
531 "szlig" => "\xDF", # small sharp s, German (sz ligature)
532 "THORN" => "\xDE", # capital THORN, Icelandic
533 "thorn" => "\xFE", # small thorn, Icelandic
534 "Uacute" => "\xDA", # capital U, acute accent
535 "uacute" => "\xFA", # small u, acute accent
536 "Ucirc" => "\xDB", # capital U, circumflex accent
537 "ucirc" => "\xFB", # small u, circumflex accent
538 "Ugrave" => "\xD9", # capital U, grave accent
539 "ugrave" => "\xF9", # small u, grave accent
540 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
541 "uuml" => "\xFC", # small u, dieresis or umlaut mark
542 "Yacute" => "\xDD", # capital Y, acute accent
543 "yacute" => "\xFD", # small y, acute accent
544 "yuml" => "\xFF", # small y, dieresis or umlaut mark
546 "lchevron" => "\xAB", # left chevron (double less than)
547 "rchevron" => "\xBB", # right chevron (double greater than)