5.002-style -w cleanup
[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm
CommitLineData
69e00e79 1package Pod::Text;
2
3# Version 1.01
4
5=head1 NAME
6
7Pod::Text - convert POD data to formatted ASCII text
8
9=head1 SYNOPSIS
10
11 use Pod::Text;
12
13 pod2text("perlfunc.pod");
14
15Also:
16
17 pod2text < input.pod
18
19=head1 DESCRIPTION
20
21Pod::Text is a module that can convert documentation in the POD format (such
22as can be found throughout the Perl distribution) into formatted ASCII.
23Termcap is optionally supported for boldface/underline, and can enabled via
24C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
25will be used to simulate bold and underlined text.
26
27A separate F<pod2text> program is included that is primarily a wrapper for
28Pod::Text.
29
30The single function C<pod2text()> can take one or two arguments. The first
31should be the name of a file to read the pod from, or "<&STDIN" to read from
32STDIN. A second argument, if provided, should be a filehandle glob where
33output should be sent.
34
35=head1 AUTHOR
36
37Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
38
39=head1 TODO
40
41Cleanup work. The input and output locations need to be more flexible,
42termcap shouldn't be a global variable, and the terminal speed needs to
43be properly calculated.
44
45=cut
46
47use Term::Cap;
48require Exporter;
49@ISA = Exporter;
50@EXPORT = qw(pod2text);
51
52$termcap=0;
53
54#$use_format=1;
55
56$UNDL = "\x1b[4m";
57$INV = "\x1b[7m";
58$BOLD = "\x1b[1m";
59$NORM = "\x1b[0m";
60
61sub pod2text {
62local($file,*OUTPUT) = @_;
63*OUTPUT = *STDOUT if @_<2;
64
65if($termcap and !$setuptermcap) {
66 $setuptermcap=1;
67
68 my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
69 $UNDL = $term->{'_us'};
70 $INV = $term->{'_mr'};
71 $BOLD = $term->{'_md'};
72 $NORM = $term->{'_me'};
73}
74
75$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
76 || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
77 || $ENV{COLUMNS}
78 || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
79 || 72;
80
81$/ = "";
82
83$FANCY = 0;
84
85$cutting = 1;
86$DEF_INDENT = 4;
87$indent = $DEF_INDENT;
88$needspace = 0;
89
90open(IN, shift) || die "Couldn't open file!\n";
91
92POD_DIRECTIVE: while (<IN>) {
93 if ($cutting) {
94 next unless /^=/;
95 $cutting = 0;
96 }
97 1 while s{^(.*?)(\t+)(.*)$}{
98 $1
99 . (' ' x (length($2) * 8 - length($1) % 8))
100 . $3
101 }me;
102 # Translate verbatim paragraph
103 if (/^\s/) {
104 $needspace = 1;
105 output($_);
106 next;
107 }
108
109sub prepare_for_output {
110
111 s/\s*$/\n/;
112 &init_noremap;
113
114 # need to hide E<> first; they're processed in clear_noremap
115 s/(E<[^<>]+>)/noremap($1)/ge;
116 $maxnest = 10;
117 while ($maxnest-- && /[A-Z]</) {
118 unless ($FANCY) {
119 s/C<(.*?)>/`$1'/g;
120 } else {
121 s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/ge;
122 }
123 # s/[IF]<(.*?)>/italic($1)/ge;
124 s/I<(.*?)>/*$1*/g;
125 # s/[CB]<(.*?)>/bold($1)/ge;
126 s/X<.*?>//g;
127 # LREF: a manpage(3f)
128 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
129 # LREF: an =item on another manpage
130 s{
131 L<
132 ([^/]+)
133 /
134 (
135 [:\w]+
136 (\(\))?
137 )
138 >
139 } {the "$2" entry in the $1 manpage}gx;
140
141 # LREF: an =item on this manpage
142 s{
143 ((?:
144 L<
145 /
146 (
147 [:\w]+
148 (\(\))?
149 )
150 >
151 (,?\s+(and\s+)?)?
152 )+)
153 } { internal_lrefs($1) }gex;
154
155 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
156 # the "func" can disambiguate
157 s{
158 L<
159 (?:
160 ([a-zA-Z]\S+?) /
161 )?
162 "?(.*?)"?
163 >
164 }{
165 do {
166 $1 # if no $1, assume it means on this page.
167 ? "the section on \"$2\" in the $1 manpage"
168 : "the section on \"$2\""
169 }
170 }gex;
171
172 s/[A-Z]<(.*?)>/$1/g;
173 }
174 clear_noremap(1);
175}
176
177 &prepare_for_output;
178
179 if (s/^=//) {
180 # $needspace = 0; # Assume this.
181 # s/\n/ /g;
182 ($Cmd, $_) = split(' ', $_, 2);
183 # clear_noremap(1);
184 if ($Cmd eq 'cut') {
185 $cutting = 1;
186 }
187 elsif ($Cmd eq 'head1') {
188 makespace();
189 print OUTPUT;
190 # print OUTPUT uc($_);
191 }
192 elsif ($Cmd eq 'head2') {
193 makespace();
194 # s/(\w+)/\u\L$1/g;
195 #print ' ' x $DEF_INDENT, $_;
196 # print "\xA7";
197 s/(\w)/\xA7 $1/ if $FANCY;
198 print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
199 }
200 elsif ($Cmd eq 'over') {
201 push(@indent,$indent);
202 $indent += ($_ + 0) || $DEF_INDENT;
203 }
204 elsif ($Cmd eq 'back') {
205 $indent = pop(@indent);
206 warn "Unmatched =back\n" unless defined $indent;
207 $needspace = 1;
208 }
209 elsif ($Cmd eq 'item') {
210 makespace();
211 # s/\A(\s*)\*/$1\xb7/ if $FANCY;
212 # s/^(\s*\*\s+)/$1 /;
213 {
214 if (length() + 3 < $indent) {
215 my $paratag = $_;
216 $_ = <>;
217 if (/^=/) { # tricked!
218 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
219 output($paratag);
220 redo POD_DIRECTIVE;
221 }
222 &prepare_for_output;
223 IP_output($paratag, $_);
224 } else {
225 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
226 output($_);
227 }
228 }
229 }
230 else {
231 warn "Unrecognized directive: $Cmd\n";
232 }
233 }
234 else {
235 # clear_noremap(1);
236 makespace();
237 output($_, 1);
238 }
239}
240
241close(IN);
242
243}
244
245#########################################################################
246
247sub makespace {
248 if ($needspace) {
249 print OUTPUT "\n";
250 $needspace = 0;
251 }
252}
253
254sub bold {
255 my $line = shift;
256 return $line if $use_format;
257 if($termcap) {
258 $line = "$BOLD$line$NORM";
259 } else {
260 $line =~ s/(.)/$1\b$1/g;
261 }
262# $line = "$BOLD$line$NORM" if $ansify;
263 return $line;
264}
265
266sub italic {
267 my $line = shift;
268 return $line if $use_format;
269 if($termcap) {
270 $line = "$UNDL$line$NORM";
271 } else {
272 $line =~ s/(.)/$1\b_/g;
273 }
274# $line = "$UNDL$line$NORM" if $ansify;
275 return $line;
276}
277
278# Fill a paragraph including underlined and overstricken chars.
279# It's not perfect for words longer than the margin, and it's probably
280# slow, but it works.
281sub fill {
282 local $_ = shift;
283 my $par = "";
284 my $indent_space = " " x $indent;
285 my $marg = $SCREEN-$indent;
286 my $line = $indent_space;
287 my $line_length;
288 foreach (split) {
289 my $word_length = length;
290 $word_length -= 2 while /\010/g; # Subtract backspaces
291
292 if ($line_length + $word_length > $marg) {
293 $par .= $line . "\n";
294 $line= $indent_space . $_;
295 $line_length = $word_length;
296 }
297 else {
298 if ($line_length) {
299 $line_length++;
300 $line .= " ";
301 }
302 $line_length += $word_length;
303 $line .= $_;
304 }
305 }
306 $par .= "$line\n" if $line;
307 $par .= "\n";
308 return $par;
309}
310
311sub IP_output {
312 local($tag, $_) = @_;
313 local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
314 $tag_cols = $SCREEN - $tag_indent;
315 $cols = $SCREEN - $indent;
316 $tag =~ s/\s*$//;
317 s/\s+/ /g;
318 s/^ //;
319 $str = "format OUTPUT = \n"
320 . (" " x ($tag_indent))
321 . '@' . ('<' x ($indent - $tag_indent - 1))
322 . "^" . ("<" x ($cols - 1)) . "\n"
323 . '$tag, $_'
324 . "\n~~"
325 . (" " x ($indent-2))
326 . "^" . ("<" x ($cols - 5)) . "\n"
327 . '$_' . "\n\n.\n1";
328 #warn $str; warn "tag is $tag, _ is $_";
329 eval $str || die;
330 write OUTPUT;
331}
332
333sub output {
334 local($_, $reformat) = @_;
335 if ($reformat) {
336 $cols = $SCREEN - $indent;
337 s/\s+/ /g;
338 s/^ //;
339 $str = "format OUTPUT = \n~~"
340 . (" " x ($indent-2))
341 . "^" . ("<" x ($cols - 5)) . "\n"
342 . '$_' . "\n\n.\n1";
343 eval $str || die;
344 write OUTPUT;
345 } else {
346 s/^/' ' x $indent/gem;
347 s/^\s+\n$/\n/gm;
348 print OUTPUT;
349 }
350}
351
352sub noremap {
353 local($thing_to_hide) = shift;
354 $thing_to_hide =~ tr/\000-\177/\200-\377/;
355 return $thing_to_hide;
356}
357
358sub init_noremap {
359 die "unmatched init" if $mapready++;
360 if ( /[\200-\377]/ ) {
361 warn "hit bit char in input stream";
362 }
363}
364
365sub clear_noremap {
366 my $ready_to_print = $_[0];
367 die "unmatched clear" unless $mapready--;
368 tr/\200-\377/\000-\177/;
369 # now for the E<>s, which have been hidden until now
370 # otherwise the interative \w<> processing would have
371 # been hosed by the E<gt>
372 s {
373 E<
374 ( [A-Za-z]+ )
375 >
376 } {
377 do {
378 defined $HTML_Escapes{$1}
379 ? do { $HTML_Escapes{$1} }
380 : do {
381 warn "Unknown escape: $& in $_";
382 "E<$1>";
383 }
384 }
385 }egx if $ready_to_print;
386}
387
388sub internal_lrefs {
389 local($_) = shift;
390 s{L</([^>]+)>}{$1}g;
391 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
392 my $retstr = "the ";
393 my $i;
394 for ($i = 0; $i <= $#items; $i++) {
395 $retstr .= "C<$items[$i]>";
396 $retstr .= ", " if @items > 2 && $i != $#items;
397 $retstr .= " and " if $i+2 == @items;
398 }
399
400 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
401 . " elsewhere in this document ";
402
403 return $retstr;
404
405}
406
407BEGIN {
408
409%HTML_Escapes = (
410 'amp' => '&', # ampersand
411 'lt' => '<', # left chevron, less-than
412 'gt' => '>', # right chevron, greater-than
413 'quot' => '"', # double quote
414
415 "Aacute" => "\xC1", # capital A, acute accent
416 "aacute" => "\xE1", # small a, acute accent
417 "Acirc" => "\xC2", # capital A, circumflex accent
418 "acirc" => "\xE2", # small a, circumflex accent
419 "AElig" => "\xC6", # capital AE diphthong (ligature)
420 "aelig" => "\xE6", # small ae diphthong (ligature)
421 "Agrave" => "\xC0", # capital A, grave accent
422 "agrave" => "\xE0", # small a, grave accent
423 "Aring" => "\xC5", # capital A, ring
424 "aring" => "\xE5", # small a, ring
425 "Atilde" => "\xC3", # capital A, tilde
426 "atilde" => "\xE3", # small a, tilde
427 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
428 "auml" => "\xE4", # small a, dieresis or umlaut mark
429 "Ccedil" => "\xC7", # capital C, cedilla
430 "ccedil" => "\xE7", # small c, cedilla
431 "Eacute" => "\xC9", # capital E, acute accent
432 "eacute" => "\xE9", # small e, acute accent
433 "Ecirc" => "\xCA", # capital E, circumflex accent
434 "ecirc" => "\xEA", # small e, circumflex accent
435 "Egrave" => "\xC8", # capital E, grave accent
436 "egrave" => "\xE8", # small e, grave accent
437 "ETH" => "\xD0", # capital Eth, Icelandic
438 "eth" => "\xF0", # small eth, Icelandic
439 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
440 "euml" => "\xEB", # small e, dieresis or umlaut mark
441 "Iacute" => "\xCD", # capital I, acute accent
442 "iacute" => "\xED", # small i, acute accent
443 "Icirc" => "\xCE", # capital I, circumflex accent
444 "icirc" => "\xEE", # small i, circumflex accent
445 "Igrave" => "\xCD", # capital I, grave accent
446 "igrave" => "\xED", # small i, grave accent
447 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
448 "iuml" => "\xEF", # small i, dieresis or umlaut mark
449 "Ntilde" => "\xD1", # capital N, tilde
450 "ntilde" => "\xF1", # small n, tilde
451 "Oacute" => "\xD3", # capital O, acute accent
452 "oacute" => "\xF3", # small o, acute accent
453 "Ocirc" => "\xD4", # capital O, circumflex accent
454 "ocirc" => "\xF4", # small o, circumflex accent
455 "Ograve" => "\xD2", # capital O, grave accent
456 "ograve" => "\xF2", # small o, grave accent
457 "Oslash" => "\xD8", # capital O, slash
458 "oslash" => "\xF8", # small o, slash
459 "Otilde" => "\xD5", # capital O, tilde
460 "otilde" => "\xF5", # small o, tilde
461 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
462 "ouml" => "\xF6", # small o, dieresis or umlaut mark
463 "szlig" => "\xDF", # small sharp s, German (sz ligature)
464 "THORN" => "\xDE", # capital THORN, Icelandic
465 "thorn" => "\xFE", # small thorn, Icelandic
466 "Uacute" => "\xDA", # capital U, acute accent
467 "uacute" => "\xFA", # small u, acute accent
468 "Ucirc" => "\xDB", # capital U, circumflex accent
469 "ucirc" => "\xFB", # small u, circumflex accent
470 "Ugrave" => "\xD9", # capital U, grave accent
471 "ugrave" => "\xF9", # small u, grave accent
472 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
473 "uuml" => "\xFC", # small u, dieresis or umlaut mark
474 "Yacute" => "\xDD", # capital Y, acute accent
475 "yacute" => "\xFD", # small y, acute accent
476 "yuml" => "\xFF", # small y, dieresis or umlaut mark
477
478 "lchevron" => "\xAB", # left chevron (double less than)
479 "rchevron" => "\xBB", # right chevron (double greater than)
480);
481}
482
4831;