enable fork.t on windows
[p5sagit/p5-mst-13.2.git] / lib / Pod / Man.pm
CommitLineData
9741dab0 1# Pod::Man -- Convert POD data to formatted *roff input.
2e20e14f 2# $Id: Man.pm,v 0.8 1999/10/07 09:39:37 eagle Exp $
9741dab0 3#
4# Copyright 1999 by Russ Allbery <rra@stanford.edu>
5#
6# This program is free software; you can redistribute it and/or modify it
7# under the same terms as Perl itself.
8#
9# This module is intended to be a replacement for pod2man, and attempts to
10# match its output except for some specific circumstances where other
11# decisions seemed to produce better output. It uses Pod::Parser and is
12# designed to be very easy to subclass.
13
14############################################################################
15# Modules and declarations
16############################################################################
17
18package Pod::Man;
19
20require 5.004;
21
22use Carp qw(carp croak);
23use Pod::Parser ();
24
25use strict;
26use subs qw(makespace);
27use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
28
29@ISA = qw(Pod::Parser);
30
2e20e14f 31($VERSION = (split (' ', q$Revision: 0.8 $ ))[1]) =~ s/\.(\d)$/.0$1/;
9741dab0 32
33
34############################################################################
35# Preamble and *roff output tables
36############################################################################
37
38# The following is the static preamble which starts all *roff output we
39# generate. It's completely static except for the font to use as a
40# fixed-width font, which is designed by @CFONT@. $PREAMBLE should
41# therefore be run through s/\@CFONT\@/<font>/g before output.
42$PREAMBLE = <<'----END OF PREAMBLE----';
43.de Sh \" Subsection heading
44.br
45.if t .Sp
46.ne 5
47.PP
48\fB\\$1\fR
49.PP
50..
51.de Sp \" Vertical space (when we can't use .PP)
52.if t .sp .5v
53.if n .sp
54..
55.de Ip \" List item
56.br
57.ie \\n(.$>=3 .ne \\$3
58.el .ne 3
59.IP "\\$1" \\$2
60..
61.de Vb \" Begin verbatim text
62.ft @CFONT@
63.nf
64.ne \\$1
65..
66.de Ve \" End verbatim text
67.ft R
68
69.fi
70..
71.\" Set up some character translations and predefined strings. \*(-- will
72.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
73.\" double quote, and \*(R" will give a right double quote. | will give a
74.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used
75.\" to do unbreakable dashes and therefore won't be available. \*(C` and
76.\" \*(C' expand to `' in nroff, nothing in troff, for use with C<>
77.tr \(*W-|\(bv\*(Tr
78.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
79.ie n \{\
80. ds -- \(*W-
81. ds PI pi
82. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
83. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
84. ds L" ""
85. ds R" ""
86. ds C` `
87. ds C' '
88'br\}
89.el\{\
90. ds -- \|\(em\|
91. ds PI \(*p
92. ds L" ``
93. ds R" ''
94'br\}
95.\"
96.\" If the F register is turned on, we'll generate index entries on stderr
97.\" for titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and
98.\" index entries marked with X<> in POD. Of course, you'll have to process
99.\" the output yourself in some meaningful fashion.
100.if \nF \{\
101. de IX
102. tm Index:\\$1\t\\n%\t"\\$2"
103. .
104. nr % 0
105. rr F
106.\}
107.\"
108.\" For nroff, turn off justification. Always turn off hyphenation; it
109.\" makes way too many mistakes in technical documents.
110.hy 0
111.if n .na
112.\"
113.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
114.\" Fear. Run. Save yourself. No user-serviceable parts.
115.bd B 3
116. \" fudge factors for nroff and troff
117.if n \{\
118. ds #H 0
119. ds #V .8m
120. ds #F .3m
121. ds #[ \f1
122. ds #] \fP
123.\}
124.if t \{\
125. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
126. ds #V .6m
127. ds #F 0
128. ds #[ \&
129. ds #] \&
130.\}
131. \" simple accents for nroff and troff
132.if n \{\
133. ds ' \&
134. ds ` \&
135. ds ^ \&
136. ds , \&
137. ds ~ ~
138. ds /
139.\}
140.if t \{\
141. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
142. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
143. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
144. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
145. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
146. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
147.\}
148. \" troff and (daisy-wheel) nroff accents
149.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
150.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
151.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
152.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
153.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
154.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
155.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
156.ds ae a\h'-(\w'a'u*4/10)'e
157.ds Ae A\h'-(\w'A'u*4/10)'E
158. \" corrections for vroff
159.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
160.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
161. \" for low resolution devices (crt and lpr)
162.if \n(.H>23 .if \n(.V>19 \
163\{\
164. ds : e
165. ds 8 ss
166. ds o a
167. ds d- d\h'-1'\(ga
168. ds D- D\h'-1'\(hy
169. ds th \o'bp'
170. ds Th \o'LP'
171. ds ae ae
172. ds Ae AE
173.\}
174.rm #[ #] #H #V #F C
175----END OF PREAMBLE----
176
177# This table is taken nearly verbatim from Tom Christiansen's pod2man. It
178# assumes that the standard preamble has already been printed, since that's
179# what defines all of the accent marks. Note that some of these are quoted
180# with double quotes since they contain embedded single quotes, so use \\
181# uniformly for backslash for readability.
182%ESCAPES = (
183 'amp' => '&', # ampersand
184 'lt' => '<', # left chevron, less-than
185 'gt' => '>', # right chevron, greater-than
186 'quot' => '"', # double quote
187
188 'Aacute' => "A\\*'", # capital A, acute accent
189 'aacute' => "a\\*'", # small a, acute accent
190 'Acirc' => 'A\\*^', # capital A, circumflex accent
191 'acirc' => 'a\\*^', # small a, circumflex accent
192 'AElig' => '\*(AE', # capital AE diphthong (ligature)
193 'aelig' => '\*(ae', # small ae diphthong (ligature)
194 'Agrave' => "A\\*`", # capital A, grave accent
195 'agrave' => "A\\*`", # small a, grave accent
196 'Aring' => 'A\\*o', # capital A, ring
197 'aring' => 'a\\*o', # small a, ring
198 'Atilde' => 'A\\*~', # capital A, tilde
199 'atilde' => 'a\\*~', # small a, tilde
200 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
201 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
202 'Ccedil' => 'C\\*,', # capital C, cedilla
203 'ccedil' => 'c\\*,', # small c, cedilla
204 'Eacute' => "E\\*'", # capital E, acute accent
205 'eacute' => "e\\*'", # small e, acute accent
206 'Ecirc' => 'E\\*^', # capital E, circumflex accent
207 'ecirc' => 'e\\*^', # small e, circumflex accent
208 'Egrave' => 'E\\*`', # capital E, grave accent
209 'egrave' => 'e\\*`', # small e, grave accent
210 'ETH' => '\\*(D-', # capital Eth, Icelandic
211 'eth' => '\\*(d-', # small eth, Icelandic
212 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
213 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
214 'Iacute' => "I\\*'", # capital I, acute accent
215 'iacute' => "i\\*'", # small i, acute accent
216 'Icirc' => 'I\\*^', # capital I, circumflex accent
217 'icirc' => 'i\\*^', # small i, circumflex accent
218 'Igrave' => 'I\\*`', # capital I, grave accent
219 'igrave' => 'i\\*`', # small i, grave accent
220 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
221 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
222 'Ntilde' => 'N\*~', # capital N, tilde
223 'ntilde' => 'n\*~', # small n, tilde
224 'Oacute' => "O\\*'", # capital O, acute accent
225 'oacute' => "o\\*'", # small o, acute accent
226 'Ocirc' => 'O\\*^', # capital O, circumflex accent
227 'ocirc' => 'o\\*^', # small o, circumflex accent
228 'Ograve' => 'O\\*`', # capital O, grave accent
229 'ograve' => 'o\\*`', # small o, grave accent
230 'Oslash' => 'O\\*/', # capital O, slash
231 'oslash' => 'o\\*/', # small o, slash
232 'Otilde' => 'O\\*~', # capital O, tilde
233 'otilde' => 'o\\*~', # small o, tilde
234 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
235 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
236 'szlig' => '\*8', # small sharp s, German (sz ligature)
237 'THORN' => '\\*(Th', # capital THORN, Icelandic
238 'thorn' => '\\*(th', # small thorn, Icelandic
239 'Uacute' => "U\\*'", # capital U, acute accent
240 'uacute' => "u\\*'", # small u, acute accent
241 'Ucirc' => 'U\\*^', # capital U, circumflex accent
242 'ucirc' => 'u\\*^', # small u, circumflex accent
243 'Ugrave' => 'U\\*`', # capital U, grave accent
244 'ugrave' => 'u\\*`', # small u, grave accent
245 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
246 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
247 'Yacute' => "Y\\*'", # capital Y, acute accent
248 'yacute' => "y\\*'", # small y, acute accent
249 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
250);
251
252
253############################################################################
254# Static helper functions
255############################################################################
256
257# Protect leading quotes and periods against interpretation as commands.
258sub protect { local $_ = shift; s/^([.\'])/\\&$1/mg; $_ }
259
260# Given a command and a single argument that may or may not contain double
261# quotes, handle double-quote formatting for it. If there are no double
262# quotes, just return the command followed by the argument in double quotes.
263# If there are double quotes, use an if statement to test for nroff, and for
264# nroff output the command followed by the argument in double quotes with
265# embedded double quotes doubled. For other formatters, remap paired double
266# quotes to `` and ''.
267sub switchquotes {
268 my $command = shift;
269 local $_ = shift;
270 my $extra = shift;
271 s/\\\*\([LR]\"/\"/g;
272 if (/\"/) {
273 s/\"/\"\"/g;
274 my $troff = $_;
275 $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
276 s/\"/\"\"/g if $extra;
277 $troff =~ s/\"/\"\"/g if $extra;
278 $_ = qq("$_") . ($extra ? " $extra" : '');
279 $troff = qq("$troff") . ($extra ? " $extra" : '');
280 return ".if n $command $_\n.el $command $troff\n";
281 } else {
282 $_ = qq("$_") . ($extra ? " $extra" : '');
283 return "$command $_\n";
284 }
285}
286
287# Translate a font string into an escape.
288sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
289
290
291############################################################################
292# Initialization
293############################################################################
294
295# Initialize the object. Here, we also process any additional options
296# passed to the constructor or set up defaults if none were given. center
297# is the centered title, release is the version number, and date is the date
298# for the documentation. Note that we can't know what file name we're
299# processing due to the architecture of Pod::Parser, so that *has* to either
300# be passed to the constructor or set separately with Pod::Man::name().
301sub initialize {
302 my $self = shift;
303
304 # Figure out the fixed-width font. If user-supplied, make sure that
305 # they are the right length.
306 for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
307 if (defined $$self{$_}) {
308 if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
309 croak "roff font should be 1 or 2 chars, not `$$self{$_}'";
310 }
311 } else {
312 $$self{$_} = '';
313 }
314 }
315
316 # Set the default fonts. We can't be sure what fixed bold-italic is
317 # going to be called, so default to just bold.
318 $$self{fixed} ||= 'CW';
319 $$self{fixedbold} ||= 'CB';
320 $$self{fixeditalic} ||= 'CI';
321 $$self{fixedbolditalic} ||= 'CB';
322
323 # Set up a table of font escapes. First number is fixed-width, second
324 # is bold, third is italic.
325 $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
326 '010' => '\fB', '011' => '\f(BI',
327 '100' => toescape ($$self{fixed}),
328 '101' => toescape ($$self{fixeditalic}),
329 '110' => toescape ($$self{fixedbold}),
330 '111' => toescape ($$self{fixedbolditalic})};
331
332 # Extra stuff for page titles.
333 $$self{center} = 'User Contributed Perl Documentation'
334 unless defined $$self{center};
335 $$self{indent} = 4 unless defined $$self{indent};
336
337 # We used to try first to get the version number from a local binary,
338 # but we shouldn't need that any more. Get the version from the running
339 # Perl.
340 if (!defined $$self{release}) {
341 my ($version, $patch) = ($] =~ /^(.{5})(\d{2})?/);
342 $$self{release} = "perl $version";
343 $$self{release} .= ", patch $patch" if $patch;
344 }
345
346 # Double quotes in things that will be quoted.
347 for (qw/center date release/) { $$self{$_} =~ s/\"/\"\"/g }
348
349 $$self{INDENT} = 0; # Current indentation level.
350 $$self{INDENTS} = []; # Stack of indentations.
351 $$self{INDEX} = []; # Index keys waiting to be printed.
352
353 $self->SUPER::initialize;
354}
355
356# For each document we process, output the preamble first. Note that the
357# fixed width font is a global default; once we interpolate it into the
358# PREAMBLE, it ain't ever changing. Maybe fix this later.
359sub begin_pod {
360 my $self = shift;
361
362 # Try to figure out the name and section from the file name.
363 my $section = $$self{section} || 1;
364 my $name = $$self{name};
365 if (!defined $name) {
366 $name = $self->input_file;
367 $section = 3 if (!$$self{section} && $name =~ /\.pm$/i);
368 $name =~ s/\.p(od|[lm])$//i;
369 if ($section =~ /^1/) {
370 require File::Basename;
371 $name = uc File::Basename::basename ($name);
372 } else {
373 # Lose everything up to the first of
374 # */lib/*perl* standard or site_perl module
375 # */*perl*/lib from -D prefix=/opt/perl
376 # */*perl*/ random module hierarchy
377 # which works. Should be fixed to use File::Spec.
378 for ($name) {
379 s%//+%/%g;
380 if ( s%^.*?/lib/[^/]*perl[^/]*/%%i
381 or s%^.*?/[^/]*perl[^/]*/(?:lib/)?%%i) {
382 s%^site(_perl)?/%%; # site and site_perl
383 s%^(.*-$^O|$^O-.*)/%%o; # arch
384 s%^\d+\.\d+%%; # version
385 }
386 s%/%::%g;
387 }
388 }
389 }
390
391 # Modification date header. Try to use the modification time of our
392 # input.
393 if (!defined $$self{date}) {
394 my $time = (stat $self->input_file)[9] || time;
395 my ($day, $month, $year) = (localtime $time)[3,4,5];
396 $month++;
397 $year += 1900;
398 $$self{date} = join ('-', $year, $month, $day);
399 }
400
401 # Now, print out the preamble and the title.
402 $PREAMBLE =~ s/\@CFONT\@/$$self{fixed}/;
403 chomp $PREAMBLE;
404 print { $self->output_handle } <<"----END OF HEADER----";
405.\\" Automatically generated by Pod::Man version $VERSION
406.\\" @{[ scalar localtime ]}
407.\\"
408.\\" Standard preamble:
409.\\" ======================================================================
410$PREAMBLE
411.\\" ======================================================================
412.\\"
413.IX Title "$name $section"
414.TH $name $section "$$self{release}" "$$self{date}" "$$self{center}"
415.UC
416----END OF HEADER----
417#"# for cperl-mode
418
419 # Initialize a few per-file variables.
420 $$self{INDENT} = 0;
421 $$self{NEEDSPACE} = 0;
422}
423
424
425############################################################################
426# Core overrides
427############################################################################
428
429# Called for each command paragraph. Gets the command, the associated
430# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
431# the command to a method named the same as the command. =cut is handled
432# internally by Pod::Parser.
433sub command {
434 my $self = shift;
435 my $command = shift;
436 return if $command eq 'pod';
437 return if ($$self{EXCLUDE} && $command ne 'end');
438 $command = 'cmd_' . $command;
439 $self->$command (@_);
440}
441
442# Called for a verbatim paragraph. Gets the paragraph, the line number, and
443# a Pod::Paragraph object. Rofficate backslashes, untabify, put a
444# zero-width character at the beginning of each line to protect against
445# commands, and wrap in .Vb/.Ve.
446sub verbatim {
447 my $self = shift;
448 return if $$self{EXCLUDE};
449 local $_ = shift;
450 return if /^\s+$/;
451 s/\s+$/\n/;
452 my $lines = tr/\n/\n/;
453 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
454 s/\\/\\e/g;
455 s/^(\s*\S)/'\&' . $1/gme;
456 $self->makespace if $$self{NEEDSPACE};
457 $self->output (".Vb $lines\n$_.Ve\n");
458 $$self{NEEDSPACE} = 0;
459}
460
461# Called for a regular text block. Gets the paragraph, the line number, and
462# a Pod::Paragraph object. Perform interpolation and output the results.
463sub textblock {
464 my $self = shift;
465 return if $$self{EXCLUDE};
466 $self->output ($_[0]), return if $$self{VERBATIM};
467
468 # Perform a little magic to collapse multiple L<> references. We'll
469 # just rewrite the whole thing into actual text at this part, bypassing
470 # the whole internal sequence parsing thing.
471 s{
472 (L< # A link of the form L</something>.
473 /
474 (
475 [:\w]+ # The item has to be a simple word...
476 (\(\))? # ...or simple function.
477 )
478 >
479 (
480 ,?\s+(and\s+)? # Allow lots of them, conjuncted.
481 L<
482 /
483 ( [:\w]+ ( \(\) )? )
484 >
485 )+
486 )
487 } {
488 local $_ = $1;
489 s{ L< / ([^>]+ ) } {$1}g;
490 my @items = split /(?:,?\s+(?:and\s+)?)/;
491 my $string = "the ";
492 my $i;
493 for ($i = 0; $i < @items; $i++) {
494 $string .= $items[$i];
495 $string .= ", " if @items > 2 && $i != $#items;
496 $string .= " and " if ($i == $#items - 1);
497 }
498 $string .= " entries elsewhere in this document";
499 $string;
500 }gex;
501
502 # Parse the tree and output it. collapse knows about references to
503 # scalars as well as scalars and does the right thing with them.
504 local $_ = $self->parse (@_);
505 s/\n\s*$/\n/;
506 $self->makespace if $$self{NEEDSPACE};
507 $self->output (protect $self->mapfonts ($_));
508 $self->outindex;
509 $$self{NEEDSPACE} = 1;
510}
511
512# Called for an interior sequence. Takes a Pod::InteriorSequence object and
513# returns a reference to a scalar. This scalar is the final formatted text.
514# It's returned as a reference so that other interior sequences above us
515# know that the text has already been processed.
516sub sequence {
517 my ($self, $seq) = @_;
518 my $command = $seq->cmd_name;
519
520 # Zero-width characters.
f5daac4a 521 if ($command eq 'Z') {
522 my $v = '\&'; return bless \ $v, 'Pod::Man::String';
523 }
9741dab0 524
525 # C<>, L<>, X<>, and E<> don't apply guesswork to their contents.
526 local $_ = $self->collapse ($seq->parse_tree, $command =~ /^[CELX]$/);
527
528 # Handle E<> escapes.
529 if ($command eq 'E') {
2e20e14f 530 if (/^\d+$/) {
531 return bless \ chr ($_), 'Pod::Man::String';
532 } elsif (exists $ESCAPES{$_}) {
9741dab0 533 return bless \ "$ESCAPES{$_}", 'Pod::Man::String';
534 } else {
535 carp "Unknown escape E<$1>";
536 return bless \ "E<$_>", 'Pod::Man::String';
537 }
538 }
539
540 # For all the other sequences, empty content produces no output.
541 return '' if $_ eq '';
542
543 # Handle formatting sequences.
544 if ($command eq 'B') {
545 return bless \ ('\f(BS' . $_ . '\f(BE'), 'Pod::Man::String';
546 } elsif ($command eq 'F') {
547 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
548 } elsif ($command eq 'I') {
549 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
550 } elsif ($command eq 'C') {
551 s/-/\\-/g;
552 s/__/_\\|_/g;
553 return bless \ ('\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"),
554 'Pod::Man::String';
555 }
556
557 # Handle links.
558 if ($command eq 'L') {
f5daac4a 559 # XXX bug in lvalue subroutines prevents this from working
560 #return bless \ ($self->buildlink ($_)), 'Pod::Man::String';
561 my $v = $self->buildlink($_);
562 return bless \$v, 'Pod::Man::String';
9741dab0 563 }
564
565 # Whitespace protection replaces whitespace with "\ ".
566 if ($command eq 'S') {
567 s/\s+/\\ /g;
568 return bless \ "$_", 'Pod::Man::String';
569 }
570
571 # Add an index entry to the list of ones waiting to be output.
572 if ($command eq 'X') { push (@{ $$self{INDEX} }, $_); return '' }
573
574 # Anything else is unknown.
575 carp "Unknown sequence $command<$_>";
576}
577
578
579############################################################################
580# Command paragraphs
581############################################################################
582
583# All command paragraphs take the paragraph and the line number.
584
585# First level heading. We can't output .IX in the NAME section due to a bug
586# in some versions of catman, so don't output a .IX for that section. .SH
587# already uses small caps, so remove any E<> sequences that would cause
588# them.
589sub cmd_head1 {
590 my $self = shift;
591 local $_ = $self->parse (@_);
592 s/\s+$//;
593 s/\\s-?\d//g;
594 $self->output (switchquotes ('.SH', $self->mapfonts ($_)));
595 $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
596 $$self{NEEDSPACE} = 0;
597}
598
599# Second level heading.
600sub cmd_head2 {
601 my $self = shift;
602 local $_ = $self->parse (@_);
603 s/\s+$//;
604 $self->output (switchquotes ('.Sh', $self->mapfonts ($_)));
605 $self->outindex ('Subsection', $_);
606 $$self{NEEDSPACE} = 0;
607}
608
609# Start a list. For indents after the first, wrap the outside indent in .RS
610# so that hanging paragraph tags will be correct.
611sub cmd_over {
612 my $self = shift;
613 local $_ = shift;
614 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
615 if (@{ $$self{INDENTS} } > 0) {
616 $self->output (".RS $$self{INDENT}\n");
617 }
618 push (@{ $$self{INDENTS} }, $$self{INDENT});
619 $$self{INDENT} = ($_ + 0);
620}
621
622# End a list. If we've closed an embedded indent, we've mangled the hanging
623# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
624# We'll close that .RS at the next =back or =item.
625sub cmd_back {
626 my $self = shift;
627 $$self{INDENT} = pop @{ $$self{INDENTS} };
628 unless (defined $$self{INDENT}) {
629 carp "Unmatched =back";
630 $$self{INDENT} = 0;
631 }
632 if ($$self{WEIRDINDENT}) {
633 $self->output (".RE\n");
634 $$self{WEIRDINDENT} = 0;
635 }
636 if (@{ $$self{INDENTS} } > 0) {
637 $self->output (".RE\n");
638 $self->output (".RS $$self{INDENT}\n");
639 $$self{WEIRDINDENT} = 1;
640 }
641 $$self{NEEDSPACE} = 1;
642}
643
644# An individual list item. Emit an index entry for anything that's
645# interesting, but don't emit index entries for things like bullets and
646# numbers. rofficate bullets too while we're at it (so for nice output, use
647# * for your lists rather than o or . or - or some other thing).
648sub cmd_item {
649 my $self = shift;
650 local $_ = $self->parse (@_);
651 s/\s+$//;
652 my $index;
653 if (/\w/ && !/^\w[.\)]\s*$/) {
654 $index = $_;
655 $index =~ s/^\s*[-*+o.]?\s*//;
656 }
657 s/^\*(\s|\Z)/\\\(bu$1/;
658 if ($$self{WEIRDINDENT}) {
659 $self->output (".RE\n");
660 $$self{WEIRDINDENT} = 0;
661 }
662 $_ = $self->mapfonts ($_);
663 $self->output (switchquotes ('.Ip', $_, $$self{INDENT}));
664 $self->outindex ($index ? ('Item', $index) : ());
665 $$self{NEEDSPACE} = 0;
666}
667
668# Begin a block for a particular translator. Setting VERBATIM triggers
669# special handling in textblock().
670sub cmd_begin {
671 my $self = shift;
672 local $_ = shift;
673 my ($kind) = /^(\S+)/ or return;
674 if ($kind eq 'man' || $kind eq 'roff') {
675 $$self{VERBATIM} = 1;
676 } else {
677 $$self{EXCLUDE} = 1;
678 }
679}
680
681# End a block for a particular translator. We assume that all =begin/=end
682# pairs are properly closed.
683sub cmd_end {
684 my $self = shift;
685 $$self{EXCLUDE} = 0;
686 $$self{VERBATIM} = 0;
687}
688
689# One paragraph for a particular translator. Ignore it unless it's intended
690# for man or roff, in which case we output it verbatim.
691sub cmd_for {
692 my $self = shift;
693 local $_ = shift;
694 my $line = shift;
695 return unless s/^(?:man|roff)\b[ \t]*\n?//;
696 $self->output ($_);
697}
698
699
700############################################################################
701# Link handling
702############################################################################
703
704# Handle links. We can't actually make real hyperlinks, so this is all to
705# figure out what text and formatting we print out.
706sub buildlink {
707 my $self = shift;
708 local $_ = shift;
709
710 # Smash whitespace in case we were split across multiple lines.
711 s/\s+/ /g;
712
713 # If we were given any explicit text, just output it.
714 if (m{ ^ ([^|]+) \| }x) { return $1 }
715
716 # Okay, leading and trailing whitespace isn't important.
717 s/^\s+//;
718 s/\s+$//;
719
720 # Default to using the whole content of the link entry as a section
721 # name. Note that L<manpage/> forces a manpage interpretation, as does
722 # something looking like L<manpage(section)>. Do the same thing to
723 # L<manpage(section)> as we would to manpage(section) without the L<>;
724 # see guesswork(). If we've added italics, don't add the "manpage"
725 # text; markup is sufficient.
726 my ($manpage, $section) = ('', $_);
727 if (/^"\s*(.*?)\s*"$/) {
728 $section = '"' . $1 . '"';
729 } elsif (m{ ^ [-:.\w]+ (?: \( \S+ \) )? $ }x) {
730 ($manpage, $section) = ($_, '');
731 $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|('/e;
732 } elsif (m%/%) {
733 ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
734 if ($manpage =~ /^[-:.\w]+(?:\(\S+\))?$/) {
735 $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|'/e;
736 }
737 $section =~ s/^\"\s*//;
738 $section =~ s/\s*\"$//;
739 }
740 if ($manpage && $manpage !~ /\\f\(IS/) {
741 $manpage = "the $manpage manpage";
742 }
743
744 # Now build the actual output text.
745 my $text = '';
746 if (!length ($section) && !length ($manpage)) {
747 carp "Invalid link $_";
748 } elsif (!length ($section)) {
749 $text = $manpage;
750 } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
751 $text .= 'the ' . $section . ' entry';
752 $text .= (length $manpage) ? " in $manpage"
753 : " elsewhere in this document";
754 } else {
2e20e14f 755 if ($section !~ /^".*"$/) { $section = '"' . $section . '"' }
756 $text .= 'the section on ' . $section;
9741dab0 757 $text .= " in $manpage" if length $manpage;
758 }
759 $text;
760}
761
762
763############################################################################
764# Escaping and fontification
765############################################################################
766
767# At this point, we'll have embedded font codes of the form \f(<font>[SE]
768# where <font> is one of B, I, or F. Turn those into the right font start
769# or end codes. B<someI<thing> else> should map to \fBsome\f(BIthing\fB
770# else\fR. The old pod2man didn't get this right; the second \fB was \fR,
771# so nested sequences didn't work right. We take care of this by using
772# variables as a combined pointer to our current font sequence, and set each
773# to the number of current nestings of start tags for that font. Use them
774# as a vector to look up what font sequence to use.
775sub mapfonts {
776 my $self = shift;
777 local $_ = shift;
778
779 my ($fixed, $bold, $italic) = (0, 0, 0);
780 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
781 s { \\f\((.)(.) } {
782 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
783 $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
784 }gxe;
785 $_;
786}
787
788
789############################################################################
790# *roff-specific parsing
791############################################################################
792
793# Called instead of parse_text, calls parse_text with the right flags.
794sub parse {
795 my $self = shift;
796 $self->parse_text ({ -expand_seq => 'sequence',
797 -expand_ptree => 'collapse' }, @_);
798}
799
800# Takes a parse tree and a flag saying whether or not to treat it as literal
801# text (not call guesswork on it), and returns the concatenation of all of
802# the text strings in that parse tree. If the literal flag isn't true,
803# guesswork() will be called on all plain scalars in the parse tree.
804# Assumes that everything in the parse tree is either a scalar or a
805# reference to a scalar.
806sub collapse {
807 my ($self, $ptree, $literal) = @_;
808 if ($literal) {
809 return join ('', map {
810 if (ref $_) {
811 $$_;
812 } else {
813 s/\\/\\e/g;
814 $_;
815 }
816 } $ptree->children);
817 } else {
818 return join ('', map {
819 ref ($_) ? $$_ : $self->guesswork ($_)
820 } $ptree->children);
821 }
822}
823
824# Takes a text block to perform guesswork on; this is guaranteed not to
825# contain any interior sequences. Returns the text block with remapping
826# done.
827sub guesswork {
828 my $self = shift;
829 local $_ = shift;
830
831 # rofficate backslashes.
832 s/\\/\\e/g;
833
834 # Ensure double underbars have a tiny space between them.
835 s/__/_\\|_/g;
836
837 # Make all caps a little smaller. Be careful here, since we don't want
838 # to make @ARGV into small caps, nor do we want to fix the MIME in
839 # MIME-Version, since it looks weird with the full-height V.
840 s{
841 ( ^ | [\s\(\"\'\`\[\{<>] )
842 ( [A-Z] [A-Z] [/A-Z+:\d_\$&-]* )
843 (?: (?= [\s>\}\]\)\'\".?!,;:] | -- ) | $ )
844 } { $1 . '\s-1' . $2 . '\s0' . $3 }egx;
845
846 # Turn PI into a pretty pi.
847 s{ (?: \\s-1 | \b ) PI (?: \\s0 | \b ) } {\\*\(PI}gx;
848
849 # Italize functions in the form func().
850 s{
851 \b
852 (
853 [:\w]+ (?:\\s-1)? \(\)
854 )
855 } { '\f(IS' . $1 . '\f(IE' }egx;
856
857 # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
858 s{
859 \b
860 (\w[-:.\w]+ (?:\\s-1)?)
861 (
862 \( [^\)] \)
863 )
864 } { '\f(IS' . $1 . '\f(IE\|' . $2 }egx;
865
866 # Convert simple Perl variable references to a fixed-width font.
867 s{
868 ( \s+ )
869 ( [\$\@%] [\w:]+ )
870 (?! \( )
871 } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
872
873 # Translate -- into a real em dash if it's used like one and fix up
874 # dashes, but keep hyphens hyphens.
875 s{ (\G|^|.) (-+) (\b|.) } {
876 my ($pre, $dash, $post) = ($1, $2, $3);
877 if (length ($dash) == 1) {
878 ($pre =~ /[a-zA-Z]/) ? "$pre-$post" : "$pre\\-$post";
879 } elsif (length ($dash) == 2
880 && ((!$pre && !$post)
881 || ($pre =~ /\w/ && !$post)
882 || ($pre eq ' ' && $post eq ' ')
883 || ($pre eq '=' && $post ne '=')
884 || ($pre ne '=' && $post eq '='))) {
885 "$pre\\*(--$post";
886 } else {
887 $pre . ('\-' x length $dash) . $post;
888 }
889 }egxs;
890
891 # Fix up double quotes.
892 s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
893
894 # Make C++ into \*(C+, which is a squinched version.
895 s{ \b C\+\+ } {\\*\(C+}gx;
896
897 # All done.
898 $_;
899}
900
901
902############################################################################
903# Output formatting
904############################################################################
905
906# Make vertical whitespace.
907sub makespace {
908 my $self = shift;
909 $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n");
910}
911
912# Output any pending index entries, and optionally an index entry given as
913# an argument. Support multiple index entries in X<> separated by slashes,
914# and strip special escapes from index entries.
915sub outindex {
916 my ($self, $section, $index) = @_;
917 my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
918 return unless ($section || @entries);
919 $$self{INDEX} = [];
920 my $output;
921 if (@entries) {
922 my $output = '.IX Xref "'
923 . join (' ', map { s/\"/\"\"/; $_ } @entries)
924 . '"' . "\n";
925 }
926 if ($section) {
927 $index =~ s/\"/\"\"/;
928 $index =~ s/\\-/-/g;
929 $index =~ s/\\(?:s-?\d|.\(..|.)//g;
930 $output .= ".IX $section " . '"' . $index . '"' . "\n";
931 }
932 $self->output ($output);
933}
934
935# Output text to the output device.
936sub output { print { $_[0]->output_handle } $_[1] }
937
938__END__
939
940.\" These are some extra bits of roff that I don't want to lose track of
941.\" but that have been removed from the preamble to make it a bit shorter
942.\" since they're not currently being used. They're accents and special
943.\" characters we don't currently have escapes for.
944.if n \{\
945. ds ? ?
946. ds ! !
947. ds q
948.\}
949.if t \{\
950. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
951. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
952. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
953.\}
954.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
955.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
956.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
957.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
958.ds oe o\h'-(\w'o'u*4/10)'e
959.ds Oe O\h'-(\w'O'u*4/10)'E
960.if \n(.H>23 .if \n(.V>19 \
961\{\
962. ds v \h'-1'\o'\(aa\(ga'
963. ds _ \h'-1'^
964. ds . \h'-1'.
965. ds 3 3
966. ds oe oe
967. ds Oe OE
968.\}
969
970############################################################################
971# Documentation
972############################################################################
973
974=head1 NAME
975
976Pod::Man - Convert POD data to formatted *roff input
977
978=head1 SYNOPSIS
979
980 use Pod::Man;
981 my $parser = Pod::Man->new (release => $VERSION, section => 8);
982
983 # Read POD from STDIN and write to STDOUT.
984 $parser->parse_from_filehandle;
985
986 # Read POD from file.pod and write to file.1.
987 $parser->parse_from_file ('file.pod', 'file.1');
988
989=head1 DESCRIPTION
990
991Pod::Man is a module to convert documentation in the POD format (the
992preferred language for documenting Perl) into *roff input using the man
993macro set. The resulting *roff code is suitable for display on a terminal
994using nroff(1), normally via man(1), or printing using troff(1). It is
9e107c59 995conventionally invoked using the driver script B<pod2man>, but it can also
9741dab0 996be used directly.
997
998As a derived class from Pod::Parser, Pod::Man supports the same methods and
999interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
1000new parser with C<Pod::Man-E<gt>new()> and then calls either
1001parse_from_filehandle() or parse_from_file().
1002
1003new() can take options, in the form of key/value pairs that control the
1004behavior of the parser. See below for details.
1005
1006If no options are given, Pod::Man uses the name of the input file with any
1007trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1008section 1 unless the file ended in C<.pm> in which case it defaults to
1009section 3, to a centered title of "User Contributed Perl Documentation", to
1010a centered footer of the Perl version it is run with, and to a left-hand
1011footer of the modification date of its input (or the current date if given
1012STDIN for input).
1013
1014Pod::Man assumes that your *roff formatters have a fixed-width font named
1015CW. If yours is called something else (like CR), use the C<fixed> option to
1016specify it. This generally only matters for troff output for printing.
1017Similarly, you can set the fonts used for bold, italic, and bold italic
1018fixed-width output.
1019
1020Besides the obvious pod conversions, Pod::Man also takes care of formatting
1021func(), func(n), and simple variable references like $foo or @bar so you
1022don't have to use code escapes for them; complex expressions like
1023C<$fred{'stuff'}> will still need to be escaped, though. It also translates
1024dashes that aren't used as hyphens into en dashes, makes long dashes--like
1025this--into proper em dashes, fixes "paired quotes," makes C++ and PI look
1026right, puts a little space between double underbars, makes ALLCAPS a teeny
1027bit smaller in troff(1), and escapes stuff that *roff treats as special so
1028that you don't have to.
1029
1030The recognized options to new() are as follows. All options take a single
1031argument.
1032
1033=over 4
1034
1035=item center
1036
1037Sets the centered page header to use instead of "User Contributed Perl
1038Documentation".
1039
1040=item date
1041
1042Sets the left-hand footer. By default, the modification date of the input
1043file will be used, or the current date if stat() can't find that file (the
1044case if the input is from STDIN), and the date will be formatted as
1045YYYY-MM-DD.
1046
1047=item fixed
1048
1049The fixed-width font to use for vertabim text and code. Defaults to CW.
1050Some systems may want CR instead. Only matters for troff(1) output.
1051
1052=item fixedbold
1053
1054Bold version of the fixed-width font. Defaults to CB. Only matters for
1055troff(1) output.
1056
1057=item fixeditalic
1058
1059Italic version of the fixed-width font (actually, something of a misnomer,
1060since most fixed-width fonts only have an oblique version, not an italic
1061version). Defaults to CI. Only matters for troff(1) output.
1062
1063=item fixedbolditalic
1064
1065Bold italic (probably actually oblique) version of the fixed-width font.
1066Pod::Man doesn't assume you have this, and defaults to CB. Some systems
1067(such as Solaris) have this font available as CX. Only matters for troff(1)
1068output.
1069
1070=item release
1071
1072Set the centered footer. By default, this is the version of Perl you run
1073Pod::Man under. Note that some system an macro sets assume that the
1074centered footer will be a modification date and will prepend something like
1075"Last modified: "; if this is the case, you may want to set C<release> to
1076the last modified date and C<date> to the version number.
1077
1078=item section
1079
1080Set the section for the C<.TH> macro. The standard section numbering
1081convention is to use 1 for user commands, 2 for system calls, 3 for
1082functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1083miscellaneous information, and 8 for administrator commands. There is a lot
1084of variation here, however; some systems (like Solaris) use 4 for file
1085formats, 5 for miscellaneous information, and 7 for devices. Still others
1086use 1m instead of 8, or some mix of both. About the only section numbers
1087that are reliably consistent are 1, 2, and 3.
1088
1089By default, section 1 will be used unless the file ends in .pm in which case
1090section 3 will be selected.
1091
1092=back
1093
1094The standard Pod::Parser method parse_from_filehandle() takes up to two
1095arguments, the first being the file handle to read POD from and the second
1096being the file handle to write the formatted output to. The first defaults
1097to STDIN if not given, and the second defaults to STDOUT. The method
1098parse_from_file() is almost identical, except that its two arguments are the
1099input and output disk files instead. See L<Pod::Parser> for the specific
1100details.
1101
1102=head1 DIAGNOSTICS
1103
1104=over 4
1105
1106=item roff font should be 1 or 2 chars, not `%s'
1107
1108(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1109wasn't either one or two characters. Pod::Man doesn't support *roff fonts
1110longer than two characters, although some *roff extensions do (the canonical
1111versions of nroff(1) and troff(1) don't either).
1112
1113=item Invalid link %s
1114
1115(W) The POD source contained a C<LE<lt>E<gt>> sequence that Pod::Man was
1116unable to parse. You should never see this error message; it probably
1117indicates a bug in Pod::Man.
1118
1119=item Unknown escape EE<lt>%sE<gt>
1120
1121(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1122know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
1123
1124=item Unknown sequence %s
1125
1126(W) The POD source contained a non-standard interior sequence (something of
1127the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
1128
1129=item Unmatched =back
1130
1131(W) Pod::Man encountered a C<=back> command that didn't correspond to an
1132C<=over> command.
1133
1134=back
1135
1136=head1 BUGS
1137
1138The lint-like features and strict POD format checking done by B<pod2man> are
1139not yet implemented and should be, along with the corresponding C<lax>
1140option.
1141
1142The NAME section should be recognized specially and index entries emitted
1143for everything in that section. This would have to be deferred until the
1144next section, since extraneous things in NAME tends to confuse various man
1145page processors.
1146
1147The handling of hyphens, en dashes, and em dashes is somewhat fragile, and
1148one may get the wrong one under some circumstances. This should only matter
1149for troff(1) output.
1150
1151When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1152necessarily get it right.
1153
1154Pod::Man doesn't handle font names longer than two characters. Neither do
1155most troff(1) implementations, but GNU troff does as an extension. It would
1156be nice to support as an option for those who want to use it.
1157
1158The preamble added to each output file is rather verbose, and most of it is
1159only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1160characters. It would ideally be nice if all of those definitions were only
1161output if needed, perhaps on the fly as the characters are used.
1162
1163Some of the automagic applied to file names assumes Unix directory
1164separators.
1165
1166Pod::Man is excessively slow.
1167
1168=head1 NOTES
1169
1170The intention is for this module and its driver script to eventually replace
1171B<pod2man> in Perl core.
1172
1173=head1 SEE ALSO
1174
9e107c59 1175L<Pod::Parser|Pod::Parser>, perlpod(1), pod2man(1), nroff(1), troff(1),
9741dab0 1176man(1), man(7)
1177
1178Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
1179Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
1180the best documentation of standard nroff(1) and troff(1). At the time of
1181this writing, it's available at http://www.cs.bell-labs.com/cm/cs/cstr.html.
1182
1183The man page documenting the man macro set may be man(5) instead of man(7)
9e107c59 1184on your system. Also, please see pod2man(1) for extensive documentation on
9741dab0 1185writing manual pages if you've not done it before and aren't familiar with
1186the conventions.
1187
1188=head1 AUTHOR
1189
1190Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
1191original B<pod2man> by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>.
1192
1193=cut