Patch for older compilers which had namespace confusion.
[p5sagit/p5-mst-13.2.git] / pod / pod2text.PL
CommitLineData
cb1a09d0 1#!/usr/local/bin/perl
2
3$SCREEN = ($ARGV[0] =~ /^-(\d+)/ && (shift, $1))
4 || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
5 || $ENV{COLUMNS}
6 || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0]
7 || 72;
8
9$/ = "";
10
11$FANCY = 0;
12
13$cutting = 1;
14$DEF_INDENT = 4;
15$indent = $DEF_INDENT;
16$needspace = 0;
17
18POD_DIRECTIVE: while (<>) {
19 if ($cutting) {
20 next unless /^=/;
21 $cutting = 0;
22 }
23 1 while s{^(.*?)(\t+)(.*)$}{
24 $1
25 . (' ' x (length($2) * 8 - length($1) % 8))
26 . $3
27 }me;
28 # Translate verbatim paragraph
29 if (/^\s/) {
30 $needspace = 1;
31 output($_);
32 next;
33 }
34
35sub prepare_for_output {
36
37 s/\s*$/\n/;
38 &init_noremap;
39
40 # need to hide E<> first; they're processed in clear_noremap
41 s/(E<[^<>]+>)/noremap($1)/ge;
42 $maxnest = 10;
43 while ($maxnest-- && /[A-Z]</) {
44 unless ($FANCY) {
45 s/C<(.*?)>/`$1'/g;
46 } else {
47 s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/ge;
48 }
49 # s/[IF]<(.*?)>/italic($1)/ge;
50 s/I<(.*?)>/*$1*/g;
51 # s/[CB]<(.*?)>/bold($1)/ge;
52 s/X<.*?>//g;
53 # LREF: a manpage(3f)
54 s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
55 # LREF: an =item on another manpage
56 s{
57 L<
58 ([^/]+)
59 /
60 (
61 [:\w]+
62 (\(\))?
63 )
64 >
65 } {the "$2" entry in the $1 manpage}gx;
66
67 # LREF: an =item on this manpage
68 s{
69 ((?:
70 L<
71 /
72 (
73 [:\w]+
74 (\(\))?
75 )
76 >
77 (,?\s+(and\s+)?)?
78 )+)
79 } { internal_lrefs($1) }gex;
80
81 # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
82 # the "func" can disambiguate
83 s{
84 L<
85 (?:
86 ([a-zA-Z]\S+?) /
87 )?
88 "?(.*?)"?
89 >
90 }{
91 do {
92 $1 # if no $1, assume it means on this page.
93 ? "the section on \"$2\" in the $1 manpage"
94 : "the section on \"$2\""
95 }
96 }gex;
97
98 s/[A-Z]<(.*?)>/$1/g;
99 }
100 clear_noremap(1);
101}
102
103 &prepare_for_output;
104
105 if (s/^=//) {
106 # $needspace = 0; # Assume this.
107 # s/\n/ /g;
108 ($Cmd, $_) = split(' ', $_, 2);
109 # clear_noremap(1);
110 if ($Cmd eq 'cut') {
111 $cutting = 1;
112 }
113 elsif ($Cmd eq 'head1') {
114 makespace();
115 print;
116 #print uc($_);
117 }
118 elsif ($Cmd eq 'head2') {
119 makespace();
120 # s/(\w+)/\u\L$1/g;
121 #print ' ' x $DEF_INDENT, $_;
122 # print "\xA7";
123 s/(\w)/\xA7 $1/ if $FANCY;
124 print ' ' x ($DEF_INDENT/2), $_, "\n";
125 }
126 elsif ($Cmd eq 'over') {
127 push(@indent,$indent);
128 $indent += ($_ + 0) || $DEF_INDENT;
129 }
130 elsif ($Cmd eq 'back') {
131 $indent = pop(@indent);
132 warn "Unmatched =back\n" unless defined $indent;
133 $needspace = 1;
134 }
135 elsif ($Cmd eq 'item') {
136 makespace();
137 # s/\A(\s*)\*/$1\xb7/ if $FANCY;
138 # s/^(\s*\*\s+)/$1 /;
139 {
140 if (length() + 3 < $indent) {
141 my $paratag = $_;
142 $_ = <>;
143 if (/^=/) { # tricked!
144 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
145 output($paratag);
146 redo POD_DIRECTIVE;
147 }
148 &prepare_for_output;
149 IP_output($paratag, $_);
150 } else {
151 local($indent) = $indent[$#index - 1] || $DEF_INDENT;
152 output($_);
153 }
154 }
155 }
156 else {
157 warn "Unrecognized directive: $Cmd\n";
158 }
159 }
160 else {
161 # clear_noremap(1);
162 makespace();
163 output($_, 1);
164 }
165}
166
167#########################################################################
168
169sub makespace {
170 if ($needspace) {
171 print "\n";
172 $needspace = 0;
173 }
174}
175
176sub bold {
177 my $line = shift;
178 $line =~ s/(.)/$1\b$1/g;
179 return $line;
180}
181
182sub italic {
183 my $line = shift;
184 $line =~ s/(.)/_\b$1/g;
185 return $line;
186}
187
188sub IP_output {
189 local($tag, $_) = @_;
190 local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT;
191 $tag_cols = $SCREEN - $tag_indent;
192 $cols = $SCREEN - $indent;
193 $tag =~ s/\s*$//;
194 s/\s+/ /g;
195 s/^ //;
196 $str = "format STDOUT = \n"
197 . (" " x ($tag_indent))
198 . '@' . ('<' x ($indent - $tag_indent - 1))
199 . "^" . ("<" x ($cols - 1)) . "\n"
200 . '$tag, $_'
201 . "\n~~"
202 . (" " x ($indent-2))
203 . "^" . ("<" x ($cols - 5)) . "\n"
204 . '$_' . "\n\n.\n1";
205 #warn $str; warn "tag is $tag, _ is $_";
206 eval $str || die;
207 write;
208}
209
210sub output {
211 local($_, $reformat) = @_;
212 if ($reformat) {
213 $cols = $SCREEN - $indent;
214 s/\s+/ /g;
215 s/^ //;
216 $str = "format STDOUT = \n~~"
217 . (" " x ($indent-2))
218 . "^" . ("<" x ($cols - 5)) . "\n"
219 . '$_' . "\n\n.\n1";
220 eval $str || die;
221 write;
222 } else {
223 s/^/' ' x $indent/gem;
224 s/^\s+\n$/\n/gm;
225 print;
226 }
227}
228
229sub noremap {
230 local($thing_to_hide) = shift;
231 $thing_to_hide =~ tr/\000-\177/\200-\377/;
232 return $thing_to_hide;
233}
234
235sub init_noremap {
236 die "unmatched init" if $mapready++;
237 if ( /[\200-\377]/ ) {
238 warn "hit bit char in input stream";
239 }
240}
241
242sub clear_noremap {
243 my $ready_to_print = $_[0];
244 die "unmatched clear" unless $mapready--;
245 tr/\200-\377/\000-\177/;
246 # now for the E<>s, which have been hidden until now
247 # otherwise the interative \w<> processing would have
248 # been hosed by the E<gt>
249 s {
250 E<
251 ( [A-Za-z]+ )
252 >
253 } {
254 do {
255 defined $HTML_Escapes{$1}
256 ? do { $HTML_Escapes{$1} }
257 : do {
258 warn "Unknown escape: $& in $_";
259 "E<$1>";
260 }
261 }
262 }egx if $ready_to_print;
263}
264
265sub internal_lrefs {
266 local($_) = shift;
267 s{L</([^>]+)>}{$1}g;
268 my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
269 my $retstr = "the ";
270 my $i;
271 for ($i = 0; $i <= $#items; $i++) {
272 $retstr .= "C<$items[$i]>";
273 $retstr .= ", " if @items > 2 && $i != $#items;
274 $retstr .= " and " if $i+2 == @items;
275 }
276
277 $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
278 . " elsewhere in this document ";
279
280 return $retstr;
281
282}
283
284BEGIN {
285
286%HTML_Escapes = (
287 'amp' => '&', # ampersand
288 'lt' => '<', # left chevron, less-than
289 'gt' => '>', # right chevron, greater-than
290 'quot' => '"', # double quote
291
292 "Aacute" => "\xC1", # capital A, acute accent
293 "aacute" => "\xE1", # small a, acute accent
294 "Acirc" => "\xC2", # capital A, circumflex accent
295 "acirc" => "\xE2", # small a, circumflex accent
296 "AElig" => "\xC6", # capital AE diphthong (ligature)
297 "aelig" => "\xE6", # small ae diphthong (ligature)
298 "Agrave" => "\xC0", # capital A, grave accent
299 "agrave" => "\xE0", # small a, grave accent
300 "Aring" => "\xC5", # capital A, ring
301 "aring" => "\xE5", # small a, ring
302 "Atilde" => "\xC3", # capital A, tilde
303 "atilde" => "\xE3", # small a, tilde
304 "Auml" => "\xC4", # capital A, dieresis or umlaut mark
305 "auml" => "\xE4", # small a, dieresis or umlaut mark
306 "Ccedil" => "\xC7", # capital C, cedilla
307 "ccedil" => "\xE7", # small c, cedilla
308 "Eacute" => "\xC9", # capital E, acute accent
309 "eacute" => "\xE9", # small e, acute accent
310 "Ecirc" => "\xCA", # capital E, circumflex accent
311 "ecirc" => "\xEA", # small e, circumflex accent
312 "Egrave" => "\xC8", # capital E, grave accent
313 "egrave" => "\xE8", # small e, grave accent
314 "ETH" => "\xD0", # capital Eth, Icelandic
315 "eth" => "\xF0", # small eth, Icelandic
316 "Euml" => "\xCB", # capital E, dieresis or umlaut mark
317 "euml" => "\xEB", # small e, dieresis or umlaut mark
318 "Iacute" => "\xCD", # capital I, acute accent
319 "iacute" => "\xED", # small i, acute accent
320 "Icirc" => "\xCE", # capital I, circumflex accent
321 "icirc" => "\xEE", # small i, circumflex accent
322 "Igrave" => "\xCD", # capital I, grave accent
323 "igrave" => "\xED", # small i, grave accent
324 "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
325 "iuml" => "\xEF", # small i, dieresis or umlaut mark
326 "Ntilde" => "\xD1", # capital N, tilde
327 "ntilde" => "\xF1", # small n, tilde
328 "Oacute" => "\xD3", # capital O, acute accent
329 "oacute" => "\xF3", # small o, acute accent
330 "Ocirc" => "\xD4", # capital O, circumflex accent
331 "ocirc" => "\xF4", # small o, circumflex accent
332 "Ograve" => "\xD2", # capital O, grave accent
333 "ograve" => "\xF2", # small o, grave accent
334 "Oslash" => "\xD8", # capital O, slash
335 "oslash" => "\xF8", # small o, slash
336 "Otilde" => "\xD5", # capital O, tilde
337 "otilde" => "\xF5", # small o, tilde
338 "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
339 "ouml" => "\xF6", # small o, dieresis or umlaut mark
340 "szlig" => "\xDF", # small sharp s, German (sz ligature)
341 "THORN" => "\xDE", # capital THORN, Icelandic
342 "thorn" => "\xFE", # small thorn, Icelandic
343 "Uacute" => "\xDA", # capital U, acute accent
344 "uacute" => "\xFA", # small u, acute accent
345 "Ucirc" => "\xDB", # capital U, circumflex accent
346 "ucirc" => "\xFB", # small u, circumflex accent
347 "Ugrave" => "\xD9", # capital U, grave accent
348 "ugrave" => "\xF9", # small u, grave accent
349 "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
350 "uuml" => "\xFC", # small u, dieresis or umlaut mark
351 "Yacute" => "\xDD", # capital Y, acute accent
352 "yacute" => "\xFD", # small y, acute accent
353 "yuml" => "\xFF", # small y, dieresis or umlaut mark
354
355 "lchevron" => "\xAB", # left chevron (double less than)
356 "rchevron" => "\xBB", # right chevron (double greater than)
357);
358}
359