Debugger patch
[p5sagit/p5-mst-13.2.git] / lib / Pod / Text.pm
CommitLineData
69e00e79 1package Pod::Text;
2
55497cff 3# Version 1.02
69e00e79 4
5=head1 NAME
6
7Pod::Text - convert POD data to formatted ASCII text
8
9=head1 SYNOPSIS
10
11 use Pod::Text;
12
13 pod2text("perlfunc.pod");
14
15Also:
16
17 pod2text < input.pod
18
19=head1 DESCRIPTION
20
21Pod::Text is a module that can convert documentation in the POD format (such
22as can be found throughout the Perl distribution) into formatted ASCII.
23Termcap is optionally supported for boldface/underline, and can enabled via
24C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
25will be used to simulate bold and underlined text.
26
27A separate F<pod2text> program is included that is primarily a wrapper for
28Pod::Text.
29
30The single function C<pod2text()> can take one or two arguments. The first
1fef88e7 31should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from
69e00e79 32STDIN. A second argument, if provided, should be a filehandle glob where
33output should be sent.
34
35=head1 AUTHOR
36
1fef88e7 37Tom Christiansen E<lt>F<tchrist@mox.perl.com>E<gt>
69e00e79 38
39=head1 TODO
40
41Cleanup work. The input and output locations need to be more flexible,
42termcap shouldn't be a global variable, and the terminal speed needs to
43be properly calculated.
44
45=cut
46
47use Term::Cap;
48require Exporter;
49@ISA = Exporter;
50@EXPORT = qw(pod2text);
51
52$termcap=0;
53
54#$use_format=1;
55
56$UNDL = "\x1b[4m";
57$INV = "\x1b[7m";
58$BOLD = "\x1b[1m";
59$NORM = "\x1b[0m";
60
61sub pod2text {
62local($file,*OUTPUT) = @_;
63*OUTPUT = *STDOUT if @_<2;
64
65if($termcap and !$setuptermcap) {
66 $setuptermcap=1;
67
68 my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
69 $UNDL = $term->{'_us'};
70 $INV = $term->{'_mr'};
71 $BOLD = $term->{'_md'};
72 $NORM = $term->{'_me'};
73}
74
75$SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
69e00e79 76 || $ENV{COLUMNS}
36477c24 77 || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
69e00e79 78 || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
79 || 72;
80
81$/ = "";
82
83$FANCY = 0;
84
85$cutting = 1;
86$DEF_INDENT = 4;
87$indent = $DEF_INDENT;
88$needspace = 0;
8c634b6e 89$begun = "";
69e00e79 90
2e917f57 91open(IN, $file) || die "Couldn't open $file: $!";
69e00e79 92
93POD_DIRECTIVE: while (<IN>) {
94 if ($cutting) {
95 next unless /^=/;
96 $cutting = 0;
97 }
8c634b6e 98 if ($begun) {
99 if (/^=end\s+$begun/) {
100 $begun = "";
101 }
102 elsif ($begun eq "text") {
103 print STDOUT $_;
104 }
105 next;
106 }
69e00e79 107 1 while s{^(.*?)(\t+)(.*)$}{
108 $1
109 . (' ' x (length($2) * 8 - length($1) % 8))
110 . $3
111 }me;
112 # Translate verbatim paragraph
113 if (/^\s/) {
114 $needspace = 1;
115 output($_);
116 next;
117 }
118
8c634b6e 119 if (/^=for\s+(\S+)\s*/s) {
120 if ($1 eq "text") {
121 print STDOUT $',"";
122 } else {
123 # ignore unknown for
124 }
125 next;
126 }
127 elsif (/^=begin\s+(\S+)\s*/s) {
128 $begun = $1;
129 if ($1 eq "text") {
130 print STDOUT $'."";
131 }
132 next;
133 }
134
69e00e79 135sub prepare_for_output {
136
137 s/\s*$/\n/;
138 &init_noremap;
139
140 # need to hide E<> first; they're processed in clear_noremap
141 s/(E<[^<>]+>)/noremap($1)/ge;
142 $maxnest = 10;
143 while ($maxnest-- && /[A-Z]</) {
144 unless ($FANCY) {
55497cff 145 s/C<(.*?)>/`$1'/sg;
69e00e79 146 } else {
55497cff 147 s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge;
69e00e79 148 }
149 # s/[IF]<(.*?)>/italic($1)/ge;
55497cff 150 s/I<(.*?)>/*$1*/sg;
69e00e79 151 # s/[CB]<(.*?)>/bold($1)/ge;
55497cff 152 s/X<.*?>//sg;
69e00e79 153 # LREF: a manpage(3f)
154 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
155 # LREF: an =item on another manpage
156 s{
157 L<
158 ([^/]+)
159 /
160 (
161 [:\w]+
162 (\(\))?
163 )
164 >
165 } {the "$2" entry in the $1 manpage}gx;
166
167 # LREF: an =item on this manpage
168 s{
169 ((?:
170 L<
171 /
172 (
173 [:\w]+
174 (\(\))?
175 )
176 >
177 (,?\s+(and\s+)?)?
178 )+)
179 } { internal_lrefs($1) }gex;
180
181 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
182 # the "func" can disambiguate
183 s{
184 L<
185 (?:
186 ([a-zA-Z]\S+?) /
187 )?
188 "?(.*?)"?
189 >
190 }{
191 do {
192 $1 # if no $1, assume it means on this page.
193 ? "the section on \"$2\" in the $1 manpage"
194 : "the section on \"$2\""
195 }
55497cff 196 }sgex;
69e00e79 197
55497cff 198 s/[A-Z]<(.*?)>/$1/sg;
69e00e79 199 }
200 clear_noremap(1);
201}
202
203 &prepare_for_output;
204
205 if (s/^=//) {
206 # $needspace = 0; # Assume this.
207 # s/\n/ /g;
208 ($Cmd, $_) = split(' ', $_, 2);
209 # clear_noremap(1);
210 if ($Cmd eq 'cut') {
211 $cutting = 1;
212 }
213 elsif ($Cmd eq 'head1') {
214 makespace();
215 print OUTPUT;
216 # print OUTPUT uc($_);
217 }
218 elsif ($Cmd eq 'head2') {
219 makespace();
220 # s/(\w+)/\u\L$1/g;
221 #print ' ' x $DEF_INDENT, $_;
222 # print "\xA7";
223 s/(\w)/\xA7 $1/ if $FANCY;
224 print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
225 }
226 elsif ($Cmd eq 'over') {
227 push(@indent,$indent);
228 $indent += ($_ + 0) || $DEF_INDENT;
229 }
230 elsif ($Cmd eq 'back') {
231 $indent = pop(@indent);
232 warn "Unmatched =back\n" unless defined $indent;
233 $needspace = 1;
234 }
235 elsif ($Cmd eq 'item') {
236 makespace();
237 # s/\A(\s*)\*/$1\xb7/ if $FANCY;
238 # s/^(\s*\*\s+)/$1 /;
239 {
240 if (length() + 3 < $indent) {
241 my $paratag = $_;
2e917f57 242 $_ = <IN>;
69e00e79 243 if (/^=/) { # tricked!
244 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
245 output($paratag);
246 redo POD_DIRECTIVE;
247 }
248 &prepare_for_output;
249 IP_output($paratag, $_);
250 } else {
251 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
252 output($_);
253 }
254 }
255 }
256 else {
257 warn "Unrecognized directive: $Cmd\n";
258 }
259 }
260 else {
261 # clear_noremap(1);
262 makespace();
263 output($_, 1);
264 }
265}
266
267close(IN);
268
269}
270
271#########################################################################
272
273sub makespace {
274 if ($needspace) {
275 print OUTPUT "\n";
276 $needspace = 0;
277 }
278}
279
280sub bold {
281 my $line = shift;
282 return $line if $use_format;
283 if($termcap) {
284 $line = "$BOLD$line$NORM";
285 } else {
286 $line =~ s/(.)/$1\b$1/g;
287 }
288# $line = "$BOLD$line$NORM" if $ansify;
289 return $line;
290}
291
292sub italic {
293 my $line = shift;
294 return $line if $use_format;
295 if($termcap) {
296 $line = "$UNDL$line$NORM";
297 } else {
298 $line =~ s/(.)/$1\b_/g;
299 }
300# $line = "$UNDL$line$NORM" if $ansify;
301 return $line;
302}
303
304# Fill a paragraph including underlined and overstricken chars.
305# It's not perfect for words longer than the margin, and it's probably
306# slow, but it works.
307sub fill {
308 local $_ = shift;
309 my $par = "";
310 my $indent_space = " " x $indent;
311 my $marg = $SCREEN-$indent;
312 my $line = $indent_space;
313 my $line_length;
314 foreach (split) {
315 my $word_length = length;
316 $word_length -= 2 while /\010/g; # Subtract backspaces
317
318 if ($line_length + $word_length > $marg) {
319 $par .= $line . "\n";
320 $line= $indent_space . $_;
321 $line_length = $word_length;
322 }
323 else {
324 if ($line_length) {
325 $line_length++;
326 $line .= " ";
327 }
328 $line_length += $word_length;
329 $line .= $_;
330 }
331 }
332 $par .= "$line\n" if $line;
333 $par .= "\n";
334 return $par;
335}
336
337sub IP_output {
338 local($tag, $_) = @_;
339 local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
340 $tag_cols = $SCREEN - $tag_indent;
341 $cols = $SCREEN - $indent;
342 $tag =~ s/\s*$//;
343 s/\s+/ /g;
344 s/^ //;
345 $str = "format OUTPUT = \n"
346 . (" " x ($tag_indent))
347 . '@' . ('<' x ($indent - $tag_indent - 1))
348 . "^" . ("<" x ($cols - 1)) . "\n"
349 . '$tag, $_'
350 . "\n~~"
351 . (" " x ($indent-2))
352 . "^" . ("<" x ($cols - 5)) . "\n"
353 . '$_' . "\n\n.\n1";
354 #warn $str; warn "tag is $tag, _ is $_";
355 eval $str || die;
356 write OUTPUT;
357}
358
359sub output {
360 local($_, $reformat) = @_;
361 if ($reformat) {
362 $cols = $SCREEN - $indent;
363 s/\s+/ /g;
364 s/^ //;
365 $str = "format OUTPUT = \n~~"
366 . (" " x ($indent-2))
367 . "^" . ("<" x ($cols - 5)) . "\n"
368 . '$_' . "\n\n.\n1";
369 eval $str || die;
370 write OUTPUT;
371 } else {
372 s/^/' ' x $indent/gem;
373 s/^\s+\n$/\n/gm;
374 print OUTPUT;
375 }
376}
377
378sub noremap {
379 local($thing_to_hide) = shift;
380 $thing_to_hide =~ tr/\000-\177/\200-\377/;
381 return $thing_to_hide;
382}
383
384sub init_noremap {
385 die "unmatched init" if $mapready++;
26fb054b 386 #mask off high bit characters in input stream
387 s/([\200-\377])/"E<".ord($1).">"/ge;
69e00e79 388}
389
390sub clear_noremap {
391 my $ready_to_print = $_[0];
392 die "unmatched clear" unless $mapready--;
393 tr/\200-\377/\000-\177/;
394 # now for the E<>s, which have been hidden until now
395 # otherwise the interative \w<> processing would have
396 # been hosed by the E<gt>
397 s {
26fb054b 398 E<
399 (
400 ( \d+ )
401 | ( [A-Za-z]+ )
402 )
69e00e79 403 >
404 } {
405 do {
26fb054b 406 defined $2
407 ? chr($2)
408 :
409 defined $HTML_Escapes{$3}
410 ? do { $HTML_Escapes{$3} }
69e00e79 411 : do {
412 warn "Unknown escape: $& in $_";
413 "E<$1>";
414 }
415 }
416 }egx if $ready_to_print;
417}
418
419sub internal_lrefs {
420 local($_) = shift;
421 s{L</([^>]+)>}{$1}g;
422 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
423 my $retstr = "the ";
424 my $i;
425 for ($i = 0; $i <= $#items; $i++) {
426 $retstr .= "C<$items[$i]>";
427 $retstr .= ", " if @items > 2 && $i != $#items;
428 $retstr .= " and " if $i+2 == @items;
429 }
430
431 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
432 . " elsewhere in this document ";
433
434 return $retstr;
435
436}
437
438BEGIN {
439
440%HTML_Escapes = (
441 'amp' => '&', # ampersand
442 'lt' => '<', # left chevron, less-than
443 'gt' => '>', # right chevron, greater-than
444 'quot' => '"', # double quote
445
446 "Aacute" => "\xC1", # capital A, acute accent
447 "aacute" => "\xE1", # small a, acute accent
448 "Acirc" => "\xC2", # capital A, circumflex accent
449 "acirc" => "\xE2", # small a, circumflex accent
450 "AElig" => "\xC6", # capital AE diphthong (ligature)
451 "aelig" => "\xE6", # small ae diphthong (ligature)
452 "Agrave" => "\xC0", # capital A, grave accent
453 "agrave" => "\xE0", # small a, grave accent
454 "Aring" => "\xC5", # capital A, ring
455 "aring" => "\xE5", # small a, ring
456 "Atilde" => "\xC3", # capital A, tilde
457 "atilde" => "\xE3", # small a, tilde
458 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
459 "auml" => "\xE4", # small a, dieresis or umlaut mark
460 "Ccedil" => "\xC7", # capital C, cedilla
461 "ccedil" => "\xE7", # small c, cedilla
462 "Eacute" => "\xC9", # capital E, acute accent
463 "eacute" => "\xE9", # small e, acute accent
464 "Ecirc" => "\xCA", # capital E, circumflex accent
465 "ecirc" => "\xEA", # small e, circumflex accent
466 "Egrave" => "\xC8", # capital E, grave accent
467 "egrave" => "\xE8", # small e, grave accent
468 "ETH" => "\xD0", # capital Eth, Icelandic
469 "eth" => "\xF0", # small eth, Icelandic
470 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
471 "euml" => "\xEB", # small e, dieresis or umlaut mark
472 "Iacute" => "\xCD", # capital I, acute accent
473 "iacute" => "\xED", # small i, acute accent
474 "Icirc" => "\xCE", # capital I, circumflex accent
475 "icirc" => "\xEE", # small i, circumflex accent
476 "Igrave" => "\xCD", # capital I, grave accent
477 "igrave" => "\xED", # small i, grave accent
478 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
479 "iuml" => "\xEF", # small i, dieresis or umlaut mark
480 "Ntilde" => "\xD1", # capital N, tilde
481 "ntilde" => "\xF1", # small n, tilde
482 "Oacute" => "\xD3", # capital O, acute accent
483 "oacute" => "\xF3", # small o, acute accent
484 "Ocirc" => "\xD4", # capital O, circumflex accent
485 "ocirc" => "\xF4", # small o, circumflex accent
486 "Ograve" => "\xD2", # capital O, grave accent
487 "ograve" => "\xF2", # small o, grave accent
488 "Oslash" => "\xD8", # capital O, slash
489 "oslash" => "\xF8", # small o, slash
490 "Otilde" => "\xD5", # capital O, tilde
491 "otilde" => "\xF5", # small o, tilde
492 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
493 "ouml" => "\xF6", # small o, dieresis or umlaut mark
494 "szlig" => "\xDF", # small sharp s, German (sz ligature)
495 "THORN" => "\xDE", # capital THORN, Icelandic
496 "thorn" => "\xFE", # small thorn, Icelandic
497 "Uacute" => "\xDA", # capital U, acute accent
498 "uacute" => "\xFA", # small u, acute accent
499 "Ucirc" => "\xDB", # capital U, circumflex accent
500 "ucirc" => "\xFB", # small u, circumflex accent
501 "Ugrave" => "\xD9", # capital U, grave accent
502 "ugrave" => "\xF9", # small u, grave accent
503 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
504 "uuml" => "\xFC", # small u, dieresis or umlaut mark
505 "Yacute" => "\xDD", # capital Y, acute accent
506 "yacute" => "\xFD", # small y, acute accent
507 "yuml" => "\xFF", # small y, dieresis or umlaut mark
508
509 "lchevron" => "\xAB", # left chevron (double less than)
510 "rchevron" => "\xBB", # right chevron (double greater than)
511);
512}
513
5141;