Change 'continuing anyway' to 'probably harmless'
[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm
CommitLineData
69e00e79 1package Pod::Text;
2
69e00e79 3=head1 NAME
4
5Pod::Text - convert POD data to formatted ASCII text
6
7=head1 SYNOPSIS
8
9 use Pod::Text;
10
11 pod2text("perlfunc.pod");
12
13Also:
14
15 pod2text < input.pod
16
17=head1 DESCRIPTION
18
19Pod::Text is a module that can convert documentation in the POD format (such
20as can be found throughout the Perl distribution) into formatted ASCII.
21Termcap is optionally supported for boldface/underline, and can enabled via
22C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
23will be used to simulate bold and underlined text.
24
25A separate F<pod2text> program is included that is primarily a wrapper for
26Pod::Text.
27
28The single function C<pod2text()> can take one or two arguments. The first
1fef88e7 29should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from
69e00e79 30STDIN. A second argument, if provided, should be a filehandle glob where
31output should be sent.
32
33=head1 AUTHOR
34
1fef88e7 35Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt>
69e00e79 36
37=head1 TODO
38
39Cleanup work. The input and output locations need to be more flexible,
40termcap shouldn't be a global variable, and the terminal speed needs to
41be properly calculated.
42
43=cut
44
45use Term::Cap;
46require Exporter;
47@ISA = Exporter;
48@EXPORT = qw(pod2text);
49
f02a87df 50use vars qw($VERSION);
78ff9ed7 51$VERSION = "1.0202";
f02a87df 52
69e00e79 53$termcap=0;
54
55#$use_format=1;
56
57$UNDL = "\x1b[4m";
58$INV = "\x1b[7m";
59$BOLD = "\x1b[1m";
60$NORM = "\x1b[0m";
61
62sub pod2text {
63local($file,*OUTPUT) = @_;
64*OUTPUT = *STDOUT if @_<2;
65
66if($termcap and !$setuptermcap) {
67 $setuptermcap=1;
68
69 my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
70 $UNDL = $term->{'_us'};
71 $INV = $term->{'_mr'};
72 $BOLD = $term->{'_md'};
73 $NORM = $term->{'_me'};
74}
75
76$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
69e00e79 77 || $ENV{COLUMNS}
36477c24 78 || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
69e00e79 79 || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
80 || 72;
81
82$/ = "";
83
84$FANCY = 0;
85
86$cutting = 1;
87$DEF_INDENT = 4;
88$indent = $DEF_INDENT;
89$needspace = 0;
8c634b6e 90$begun = "";
69e00e79 91
2e917f57 92open(IN, $file) || die "Couldn't open $file: $!";
69e00e79 93
94POD_DIRECTIVE: while (<IN>) {
95 if ($cutting) {
96 next unless /^=/;
97 $cutting = 0;
98 }
8c634b6e 99 if ($begun) {
100 if (/^=end\s+$begun/) {
101 $begun = "";
102 }
103 elsif ($begun eq "text") {
78ff9ed7 104 print OUTPUT $_;
8c634b6e 105 }
106 next;
107 }
69e00e79 108 1 while s{^(.*?)(\t+)(.*)$}{
109 $1
110 . (' ' x (length($2) * 8 - length($1) % 8))
111 . $3
112 }me;
113 # Translate verbatim paragraph
114 if (/^\s/) {
115 $needspace = 1;
116 output($_);
117 next;
118 }
119
f02a87df 120 if (/^=for\s+(\S+)\s*(.*)/s) {
8c634b6e 121 if ($1 eq "text") {
78ff9ed7 122 print OUTPUT $2,"";
8c634b6e 123 } else {
124 # ignore unknown for
125 }
126 next;
127 }
f02a87df 128 elsif (/^=begin\s+(\S+)\s*(.*)/s) {
8c634b6e 129 $begun = $1;
130 if ($1 eq "text") {
78ff9ed7 131 print OUTPUT $2."";
8c634b6e 132 }
133 next;
134 }
135
69e00e79 136sub prepare_for_output {
137
138 s/\s*$/\n/;
139 &init_noremap;
140
141 # need to hide E<> first; they're processed in clear_noremap
142 s/(E<[^<>]+>)/noremap($1)/ge;
143 $maxnest = 10;
144 while ($maxnest-- && /[A-Z]</) {
145 unless ($FANCY) {
55497cff 146 s/C<(.*?)>/`$1'/sg;
69e00e79 147 } else {
55497cff 148 s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge;
69e00e79 149 }
150 # s/[IF]<(.*?)>/italic($1)/ge;
55497cff 151 s/I<(.*?)>/*$1*/sg;
69e00e79 152 # s/[CB]<(.*?)>/bold($1)/ge;
55497cff 153 s/X<.*?>//sg;
69e00e79 154 # LREF: a manpage(3f)
155 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
156 # LREF: an =item on another manpage
157 s{
158 L<
159 ([^/]+)
160 /
161 (
162 [:\w]+
163 (\(\))?
164 )
165 >
166 } {the "$2" entry in the $1 manpage}gx;
167
168 # LREF: an =item on this manpage
169 s{
170 ((?:
171 L<
172 /
173 (
174 [:\w]+
175 (\(\))?
176 )
177 >
178 (,?\s+(and\s+)?)?
179 )+)
180 } { internal_lrefs($1) }gex;
181
182 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
183 # the "func" can disambiguate
184 s{
185 L<
186 (?:
187 ([a-zA-Z]\S+?) /
188 )?
189 "?(.*?)"?
190 >
191 }{
192 do {
193 $1 # if no $1, assume it means on this page.
194 ? "the section on \"$2\" in the $1 manpage"
195 : "the section on \"$2\""
196 }
55497cff 197 }sgex;
69e00e79 198
55497cff 199 s/[A-Z]<(.*?)>/$1/sg;
69e00e79 200 }
201 clear_noremap(1);
202}
203
204 &prepare_for_output;
205
206 if (s/^=//) {
207 # $needspace = 0; # Assume this.
208 # s/\n/ /g;
209 ($Cmd, $_) = split(' ', $_, 2);
210 # clear_noremap(1);
211 if ($Cmd eq 'cut') {
212 $cutting = 1;
213 }
78ff9ed7 214 elsif ($Cmd eq 'pod') {
215 $cutting = 0;
216 }
69e00e79 217 elsif ($Cmd eq 'head1') {
218 makespace();
219 print OUTPUT;
220 # print OUTPUT uc($_);
221 }
222 elsif ($Cmd eq 'head2') {
223 makespace();
224 # s/(\w+)/\u\L$1/g;
225 #print ' ' x $DEF_INDENT, $_;
226 # print "\xA7";
227 s/(\w)/\xA7 $1/ if $FANCY;
228 print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
229 }
230 elsif ($Cmd eq 'over') {
231 push(@indent,$indent);
232 $indent += ($_ + 0) || $DEF_INDENT;
233 }
234 elsif ($Cmd eq 'back') {
235 $indent = pop(@indent);
236 warn "Unmatched =back\n" unless defined $indent;
237 $needspace = 1;
238 }
239 elsif ($Cmd eq 'item') {
240 makespace();
241 # s/\A(\s*)\*/$1\xb7/ if $FANCY;
242 # s/^(\s*\*\s+)/$1 /;
243 {
244 if (length() + 3 < $indent) {
245 my $paratag = $_;
2e917f57 246 $_ = <IN>;
69e00e79 247 if (/^=/) { # tricked!
248 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
249 output($paratag);
250 redo POD_DIRECTIVE;
251 }
252 &prepare_for_output;
253 IP_output($paratag, $_);
254 } else {
255 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
256 output($_);
257 }
258 }
259 }
260 else {
261 warn "Unrecognized directive: $Cmd\n";
262 }
263 }
264 else {
265 # clear_noremap(1);
266 makespace();
267 output($_, 1);
268 }
269}
270
271close(IN);
272
273}
274
275#########################################################################
276
277sub makespace {
278 if ($needspace) {
279 print OUTPUT "\n";
280 $needspace = 0;
281 }
282}
283
284sub bold {
285 my $line = shift;
286 return $line if $use_format;
287 if($termcap) {
288 $line = "$BOLD$line$NORM";
289 } else {
290 $line =~ s/(.)/$1\b$1/g;
291 }
292# $line = "$BOLD$line$NORM" if $ansify;
293 return $line;
294}
295
296sub italic {
297 my $line = shift;
298 return $line if $use_format;
299 if($termcap) {
300 $line = "$UNDL$line$NORM";
301 } else {
302 $line =~ s/(.)/$1\b_/g;
303 }
304# $line = "$UNDL$line$NORM" if $ansify;
305 return $line;
306}
307
308# Fill a paragraph including underlined and overstricken chars.
309# It's not perfect for words longer than the margin, and it's probably
310# slow, but it works.
311sub fill {
312 local $_ = shift;
313 my $par = "";
314 my $indent_space = " " x $indent;
315 my $marg = $SCREEN-$indent;
316 my $line = $indent_space;
317 my $line_length;
318 foreach (split) {
319 my $word_length = length;
320 $word_length -= 2 while /\010/g; # Subtract backspaces
321
322 if ($line_length + $word_length > $marg) {
323 $par .= $line . "\n";
324 $line= $indent_space . $_;
325 $line_length = $word_length;
326 }
327 else {
328 if ($line_length) {
329 $line_length++;
330 $line .= " ";
331 }
332 $line_length += $word_length;
333 $line .= $_;
334 }
335 }
336 $par .= "$line\n" if $line;
337 $par .= "\n";
338 return $par;
339}
340
341sub IP_output {
342 local($tag, $_) = @_;
343 local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
344 $tag_cols = $SCREEN - $tag_indent;
345 $cols = $SCREEN - $indent;
346 $tag =~ s/\s*$//;
347 s/\s+/ /g;
348 s/^ //;
349 $str = "format OUTPUT = \n"
350 . (" " x ($tag_indent))
351 . '@' . ('<' x ($indent - $tag_indent - 1))
352 . "^" . ("<" x ($cols - 1)) . "\n"
353 . '$tag, $_'
354 . "\n~~"
355 . (" " x ($indent-2))
356 . "^" . ("<" x ($cols - 5)) . "\n"
357 . '$_' . "\n\n.\n1";
358 #warn $str; warn "tag is $tag, _ is $_";
359 eval $str || die;
360 write OUTPUT;
361}
362
363sub output {
364 local($_, $reformat) = @_;
365 if ($reformat) {
366 $cols = $SCREEN - $indent;
367 s/\s+/ /g;
368 s/^ //;
369 $str = "format OUTPUT = \n~~"
370 . (" " x ($indent-2))
371 . "^" . ("<" x ($cols - 5)) . "\n"
372 . '$_' . "\n\n.\n1";
373 eval $str || die;
374 write OUTPUT;
375 } else {
376 s/^/' ' x $indent/gem;
377 s/^\s+\n$/\n/gm;
378 print OUTPUT;
379 }
380}
381
382sub noremap {
383 local($thing_to_hide) = shift;
384 $thing_to_hide =~ tr/\000-\177/\200-\377/;
385 return $thing_to_hide;
386}
387
388sub init_noremap {
389 die "unmatched init" if $mapready++;
26fb054b 390 #mask off high bit characters in input stream
391 s/([\200-\377])/"E<".ord($1).">"/ge;
69e00e79 392}
393
394sub clear_noremap {
395 my $ready_to_print = $_[0];
396 die "unmatched clear" unless $mapready--;
397 tr/\200-\377/\000-\177/;
398 # now for the E<>s, which have been hidden until now
399 # otherwise the interative \w<> processing would have
400 # been hosed by the E<gt>
401 s {
26fb054b 402 E<
403 (
404 ( \d+ )
405 | ( [A-Za-z]+ )
406 )
69e00e79 407 >
408 } {
409 do {
26fb054b 410 defined $2
411 ? chr($2)
412 :
413 defined $HTML_Escapes{$3}
414 ? do { $HTML_Escapes{$3} }
69e00e79 415 : do {
f02a87df 416 warn "Unknown escape: E<$1> in $_";
69e00e79 417 "E<$1>";
418 }
419 }
420 }egx if $ready_to_print;
421}
422
423sub internal_lrefs {
424 local($_) = shift;
425 s{L</([^>]+)>}{$1}g;
426 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
427 my $retstr = "the ";
428 my $i;
429 for ($i = 0; $i <= $#items; $i++) {
430 $retstr .= "C<$items[$i]>";
431 $retstr .= ", " if @items > 2 && $i != $#items;
432 $retstr .= " and " if $i+2 == @items;
433 }
434
435 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
436 . " elsewhere in this document ";
437
438 return $retstr;
439
440}
441
442BEGIN {
443
444%HTML_Escapes = (
445 'amp' => '&', # ampersand
446 'lt' => '<', # left chevron, less-than
447 'gt' => '>', # right chevron, greater-than
448 'quot' => '"', # double quote
449
450 "Aacute" => "\xC1", # capital A, acute accent
451 "aacute" => "\xE1", # small a, acute accent
452 "Acirc" => "\xC2", # capital A, circumflex accent
453 "acirc" => "\xE2", # small a, circumflex accent
454 "AElig" => "\xC6", # capital AE diphthong (ligature)
455 "aelig" => "\xE6", # small ae diphthong (ligature)
456 "Agrave" => "\xC0", # capital A, grave accent
457 "agrave" => "\xE0", # small a, grave accent
458 "Aring" => "\xC5", # capital A, ring
459 "aring" => "\xE5", # small a, ring
460 "Atilde" => "\xC3", # capital A, tilde
461 "atilde" => "\xE3", # small a, tilde
462 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
463 "auml" => "\xE4", # small a, dieresis or umlaut mark
464 "Ccedil" => "\xC7", # capital C, cedilla
465 "ccedil" => "\xE7", # small c, cedilla
466 "Eacute" => "\xC9", # capital E, acute accent
467 "eacute" => "\xE9", # small e, acute accent
468 "Ecirc" => "\xCA", # capital E, circumflex accent
469 "ecirc" => "\xEA", # small e, circumflex accent
470 "Egrave" => "\xC8", # capital E, grave accent
471 "egrave" => "\xE8", # small e, grave accent
472 "ETH" => "\xD0", # capital Eth, Icelandic
473 "eth" => "\xF0", # small eth, Icelandic
474 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
475 "euml" => "\xEB", # small e, dieresis or umlaut mark
476 "Iacute" => "\xCD", # capital I, acute accent
477 "iacute" => "\xED", # small i, acute accent
478 "Icirc" => "\xCE", # capital I, circumflex accent
479 "icirc" => "\xEE", # small i, circumflex accent
480 "Igrave" => "\xCD", # capital I, grave accent
481 "igrave" => "\xED", # small i, grave accent
482 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
483 "iuml" => "\xEF", # small i, dieresis or umlaut mark
484 "Ntilde" => "\xD1", # capital N, tilde
485 "ntilde" => "\xF1", # small n, tilde
486 "Oacute" => "\xD3", # capital O, acute accent
487 "oacute" => "\xF3", # small o, acute accent
488 "Ocirc" => "\xD4", # capital O, circumflex accent
489 "ocirc" => "\xF4", # small o, circumflex accent
490 "Ograve" => "\xD2", # capital O, grave accent
491 "ograve" => "\xF2", # small o, grave accent
492 "Oslash" => "\xD8", # capital O, slash
493 "oslash" => "\xF8", # small o, slash
494 "Otilde" => "\xD5", # capital O, tilde
495 "otilde" => "\xF5", # small o, tilde
496 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
497 "ouml" => "\xF6", # small o, dieresis or umlaut mark
498 "szlig" => "\xDF", # small sharp s, German (sz ligature)
499 "THORN" => "\xDE", # capital THORN, Icelandic
500 "thorn" => "\xFE", # small thorn, Icelandic
501 "Uacute" => "\xDA", # capital U, acute accent
502 "uacute" => "\xFA", # small u, acute accent
503 "Ucirc" => "\xDB", # capital U, circumflex accent
504 "ucirc" => "\xFB", # small u, circumflex accent
505 "Ugrave" => "\xD9", # capital U, grave accent
506 "ugrave" => "\xF9", # small u, grave accent
507 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
508 "uuml" => "\xFC", # small u, dieresis or umlaut mark
509 "Yacute" => "\xDD", # capital Y, acute accent
510 "yacute" => "\xFD", # small y, acute accent
511 "yuml" => "\xFF", # small y, dieresis or umlaut mark
512
513 "lchevron" => "\xAB", # left chevron (double less than)
514 "rchevron" => "\xBB", # right chevron (double greater than)
515);
516}
517
5181;