Commit | Line | Data |
cb1a09d0 |
1 | #!/usr/local/bin/perl |
2 | |
3 | $SCREEN = ($ARGV[0] =~ /^-(\d+)/ && (shift, $1)) |
4 | || ($ENV{TERMCAP} =~ /co#(\d+)/)[0] |
5 | || $ENV{COLUMNS} |
6 | || (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0] |
7 | || 72; |
8 | |
9 | $/ = ""; |
10 | |
11 | $FANCY = 0; |
12 | |
13 | $cutting = 1; |
14 | $DEF_INDENT = 4; |
15 | $indent = $DEF_INDENT; |
16 | $needspace = 0; |
17 | |
18 | POD_DIRECTIVE: while (<>) { |
19 | if ($cutting) { |
20 | next unless /^=/; |
21 | $cutting = 0; |
22 | } |
23 | 1 while s{^(.*?)(\t+)(.*)$}{ |
24 | $1 |
25 | . (' ' x (length($2) * 8 - length($1) % 8)) |
26 | . $3 |
27 | }me; |
28 | # Translate verbatim paragraph |
29 | if (/^\s/) { |
30 | $needspace = 1; |
31 | output($_); |
32 | next; |
33 | } |
34 | |
35 | sub prepare_for_output { |
36 | |
37 | s/\s*$/\n/; |
38 | &init_noremap; |
39 | |
40 | # need to hide E<> first; they're processed in clear_noremap |
41 | s/(E<[^<>]+>)/noremap($1)/ge; |
42 | $maxnest = 10; |
43 | while ($maxnest-- && /[A-Z]</) { |
44 | unless ($FANCY) { |
45 | s/C<(.*?)>/`$1'/g; |
46 | } else { |
47 | s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/ge; |
48 | } |
49 | # s/[IF]<(.*?)>/italic($1)/ge; |
50 | s/I<(.*?)>/*$1*/g; |
51 | # s/[CB]<(.*?)>/bold($1)/ge; |
52 | s/X<.*?>//g; |
53 | # LREF: a manpage(3f) |
54 | s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g; |
55 | # LREF: an =item on another manpage |
56 | s{ |
57 | L< |
58 | ([^/]+) |
59 | / |
60 | ( |
61 | [:\w]+ |
62 | (\(\))? |
63 | ) |
64 | > |
65 | } {the "$2" entry in the $1 manpage}gx; |
66 | |
67 | # LREF: an =item on this manpage |
68 | s{ |
69 | ((?: |
70 | L< |
71 | / |
72 | ( |
73 | [:\w]+ |
74 | (\(\))? |
75 | ) |
76 | > |
77 | (,?\s+(and\s+)?)? |
78 | )+) |
79 | } { internal_lrefs($1) }gex; |
80 | |
81 | # LREF: a =head2 (head1?), maybe on a manpage, maybe right here |
82 | # the "func" can disambiguate |
83 | s{ |
84 | L< |
85 | (?: |
86 | ([a-zA-Z]\S+?) / |
87 | )? |
88 | "?(.*?)"? |
89 | > |
90 | }{ |
91 | do { |
92 | $1 # if no $1, assume it means on this page. |
93 | ? "the section on \"$2\" in the $1 manpage" |
94 | : "the section on \"$2\"" |
95 | } |
96 | }gex; |
97 | |
98 | s/[A-Z]<(.*?)>/$1/g; |
99 | } |
100 | clear_noremap(1); |
101 | } |
102 | |
103 | &prepare_for_output; |
104 | |
105 | if (s/^=//) { |
106 | # $needspace = 0; # Assume this. |
107 | # s/\n/ /g; |
108 | ($Cmd, $_) = split(' ', $_, 2); |
109 | # clear_noremap(1); |
110 | if ($Cmd eq 'cut') { |
111 | $cutting = 1; |
112 | } |
113 | elsif ($Cmd eq 'head1') { |
114 | makespace(); |
115 | print; |
116 | #print uc($_); |
117 | } |
118 | elsif ($Cmd eq 'head2') { |
119 | makespace(); |
120 | # s/(\w+)/\u\L$1/g; |
121 | #print ' ' x $DEF_INDENT, $_; |
122 | # print "\xA7"; |
123 | s/(\w)/\xA7 $1/ if $FANCY; |
124 | print ' ' x ($DEF_INDENT/2), $_, "\n"; |
125 | } |
126 | elsif ($Cmd eq 'over') { |
127 | push(@indent,$indent); |
128 | $indent += ($_ + 0) || $DEF_INDENT; |
129 | } |
130 | elsif ($Cmd eq 'back') { |
131 | $indent = pop(@indent); |
132 | warn "Unmatched =back\n" unless defined $indent; |
133 | $needspace = 1; |
134 | } |
135 | elsif ($Cmd eq 'item') { |
136 | makespace(); |
137 | # s/\A(\s*)\*/$1\xb7/ if $FANCY; |
138 | # s/^(\s*\*\s+)/$1 /; |
139 | { |
140 | if (length() + 3 < $indent) { |
141 | my $paratag = $_; |
142 | $_ = <>; |
143 | if (/^=/) { # tricked! |
144 | local($indent) = $indent[$#index - 1] || $DEF_INDENT; |
145 | output($paratag); |
146 | redo POD_DIRECTIVE; |
147 | } |
148 | &prepare_for_output; |
149 | IP_output($paratag, $_); |
150 | } else { |
151 | local($indent) = $indent[$#index - 1] || $DEF_INDENT; |
152 | output($_); |
153 | } |
154 | } |
155 | } |
156 | else { |
157 | warn "Unrecognized directive: $Cmd\n"; |
158 | } |
159 | } |
160 | else { |
161 | # clear_noremap(1); |
162 | makespace(); |
163 | output($_, 1); |
164 | } |
165 | } |
166 | |
167 | ######################################################################### |
168 | |
169 | sub makespace { |
170 | if ($needspace) { |
171 | print "\n"; |
172 | $needspace = 0; |
173 | } |
174 | } |
175 | |
176 | sub bold { |
177 | my $line = shift; |
178 | $line =~ s/(.)/$1\b$1/g; |
179 | return $line; |
180 | } |
181 | |
182 | sub italic { |
183 | my $line = shift; |
184 | $line =~ s/(.)/_\b$1/g; |
185 | return $line; |
186 | } |
187 | |
188 | sub IP_output { |
189 | local($tag, $_) = @_; |
190 | local($tag_indent) = $indent[$#index - 1] || $DEF_INDENT; |
191 | $tag_cols = $SCREEN - $tag_indent; |
192 | $cols = $SCREEN - $indent; |
193 | $tag =~ s/\s*$//; |
194 | s/\s+/ /g; |
195 | s/^ //; |
196 | $str = "format STDOUT = \n" |
197 | . (" " x ($tag_indent)) |
198 | . '@' . ('<' x ($indent - $tag_indent - 1)) |
199 | . "^" . ("<" x ($cols - 1)) . "\n" |
200 | . '$tag, $_' |
201 | . "\n~~" |
202 | . (" " x ($indent-2)) |
203 | . "^" . ("<" x ($cols - 5)) . "\n" |
204 | . '$_' . "\n\n.\n1"; |
205 | #warn $str; warn "tag is $tag, _ is $_"; |
206 | eval $str || die; |
207 | write; |
208 | } |
209 | |
210 | sub output { |
211 | local($_, $reformat) = @_; |
212 | if ($reformat) { |
213 | $cols = $SCREEN - $indent; |
214 | s/\s+/ /g; |
215 | s/^ //; |
216 | $str = "format STDOUT = \n~~" |
217 | . (" " x ($indent-2)) |
218 | . "^" . ("<" x ($cols - 5)) . "\n" |
219 | . '$_' . "\n\n.\n1"; |
220 | eval $str || die; |
221 | write; |
222 | } else { |
223 | s/^/' ' x $indent/gem; |
224 | s/^\s+\n$/\n/gm; |
225 | print; |
226 | } |
227 | } |
228 | |
229 | sub noremap { |
230 | local($thing_to_hide) = shift; |
231 | $thing_to_hide =~ tr/\000-\177/\200-\377/; |
232 | return $thing_to_hide; |
233 | } |
234 | |
235 | sub init_noremap { |
236 | die "unmatched init" if $mapready++; |
237 | if ( /[\200-\377]/ ) { |
238 | warn "hit bit char in input stream"; |
239 | } |
240 | } |
241 | |
242 | sub clear_noremap { |
243 | my $ready_to_print = $_[0]; |
244 | die "unmatched clear" unless $mapready--; |
245 | tr/\200-\377/\000-\177/; |
246 | # now for the E<>s, which have been hidden until now |
247 | # otherwise the interative \w<> processing would have |
248 | # been hosed by the E<gt> |
249 | s { |
250 | E< |
251 | ( [A-Za-z]+ ) |
252 | > |
253 | } { |
254 | do { |
255 | defined $HTML_Escapes{$1} |
256 | ? do { $HTML_Escapes{$1} } |
257 | : do { |
258 | warn "Unknown escape: $& in $_"; |
259 | "E<$1>"; |
260 | } |
261 | } |
262 | }egx if $ready_to_print; |
263 | } |
264 | |
265 | sub internal_lrefs { |
266 | local($_) = shift; |
267 | s{L</([^>]+)>}{$1}g; |
268 | my(@items) = split( /(?:,?\s+(?:and\s+)?)/ ); |
269 | my $retstr = "the "; |
270 | my $i; |
271 | for ($i = 0; $i <= $#items; $i++) { |
272 | $retstr .= "C<$items[$i]>"; |
273 | $retstr .= ", " if @items > 2 && $i != $#items; |
274 | $retstr .= " and " if $i+2 == @items; |
275 | } |
276 | |
277 | $retstr .= " entr" . ( @items > 1 ? "ies" : "y" ) |
278 | . " elsewhere in this document "; |
279 | |
280 | return $retstr; |
281 | |
282 | } |
283 | |
284 | BEGIN { |
285 | |
286 | %HTML_Escapes = ( |
287 | 'amp' => '&', # ampersand |
288 | 'lt' => '<', # left chevron, less-than |
289 | 'gt' => '>', # right chevron, greater-than |
290 | 'quot' => '"', # double quote |
291 | |
292 | "Aacute" => "\xC1", # capital A, acute accent |
293 | "aacute" => "\xE1", # small a, acute accent |
294 | "Acirc" => "\xC2", # capital A, circumflex accent |
295 | "acirc" => "\xE2", # small a, circumflex accent |
296 | "AElig" => "\xC6", # capital AE diphthong (ligature) |
297 | "aelig" => "\xE6", # small ae diphthong (ligature) |
298 | "Agrave" => "\xC0", # capital A, grave accent |
299 | "agrave" => "\xE0", # small a, grave accent |
300 | "Aring" => "\xC5", # capital A, ring |
301 | "aring" => "\xE5", # small a, ring |
302 | "Atilde" => "\xC3", # capital A, tilde |
303 | "atilde" => "\xE3", # small a, tilde |
304 | "Auml" => "\xC4", # capital A, dieresis or umlaut mark |
305 | "auml" => "\xE4", # small a, dieresis or umlaut mark |
306 | "Ccedil" => "\xC7", # capital C, cedilla |
307 | "ccedil" => "\xE7", # small c, cedilla |
308 | "Eacute" => "\xC9", # capital E, acute accent |
309 | "eacute" => "\xE9", # small e, acute accent |
310 | "Ecirc" => "\xCA", # capital E, circumflex accent |
311 | "ecirc" => "\xEA", # small e, circumflex accent |
312 | "Egrave" => "\xC8", # capital E, grave accent |
313 | "egrave" => "\xE8", # small e, grave accent |
314 | "ETH" => "\xD0", # capital Eth, Icelandic |
315 | "eth" => "\xF0", # small eth, Icelandic |
316 | "Euml" => "\xCB", # capital E, dieresis or umlaut mark |
317 | "euml" => "\xEB", # small e, dieresis or umlaut mark |
318 | "Iacute" => "\xCD", # capital I, acute accent |
319 | "iacute" => "\xED", # small i, acute accent |
320 | "Icirc" => "\xCE", # capital I, circumflex accent |
321 | "icirc" => "\xEE", # small i, circumflex accent |
322 | "Igrave" => "\xCD", # capital I, grave accent |
323 | "igrave" => "\xED", # small i, grave accent |
324 | "Iuml" => "\xCF", # capital I, dieresis or umlaut mark |
325 | "iuml" => "\xEF", # small i, dieresis or umlaut mark |
326 | "Ntilde" => "\xD1", # capital N, tilde |
327 | "ntilde" => "\xF1", # small n, tilde |
328 | "Oacute" => "\xD3", # capital O, acute accent |
329 | "oacute" => "\xF3", # small o, acute accent |
330 | "Ocirc" => "\xD4", # capital O, circumflex accent |
331 | "ocirc" => "\xF4", # small o, circumflex accent |
332 | "Ograve" => "\xD2", # capital O, grave accent |
333 | "ograve" => "\xF2", # small o, grave accent |
334 | "Oslash" => "\xD8", # capital O, slash |
335 | "oslash" => "\xF8", # small o, slash |
336 | "Otilde" => "\xD5", # capital O, tilde |
337 | "otilde" => "\xF5", # small o, tilde |
338 | "Ouml" => "\xD6", # capital O, dieresis or umlaut mark |
339 | "ouml" => "\xF6", # small o, dieresis or umlaut mark |
340 | "szlig" => "\xDF", # small sharp s, German (sz ligature) |
341 | "THORN" => "\xDE", # capital THORN, Icelandic |
342 | "thorn" => "\xFE", # small thorn, Icelandic |
343 | "Uacute" => "\xDA", # capital U, acute accent |
344 | "uacute" => "\xFA", # small u, acute accent |
345 | "Ucirc" => "\xDB", # capital U, circumflex accent |
346 | "ucirc" => "\xFB", # small u, circumflex accent |
347 | "Ugrave" => "\xD9", # capital U, grave accent |
348 | "ugrave" => "\xF9", # small u, grave accent |
349 | "Uuml" => "\xDC", # capital U, dieresis or umlaut mark |
350 | "uuml" => "\xFC", # small u, dieresis or umlaut mark |
351 | "Yacute" => "\xDD", # capital Y, acute accent |
352 | "yacute" => "\xFD", # small y, acute accent |
353 | "yuml" => "\xFF", # small y, dieresis or umlaut mark |
354 | |
355 | "lchevron" => "\xAB", # left chevron (double less than) |
356 | "rchevron" => "\xBB", # right chevron (double greater than) |
357 | ); |
358 | } |
359 | |