1 # Pod::Man -- Convert POD data to formatted *roff input.
2 # $Id: Man.pm,v 1.32 2002/01/02 09:02:24 eagle Exp $
4 # Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>
6 # This program is free software; you may redistribute it and/or modify it
7 # under the same terms as Perl itself.
9 # This module translates POD documentation into *roff markup using the man
10 # macro set, and is intended for converting POD documents written as Unix
11 # manual pages to manual pages that can be read by the man(1) command. It is
12 # a replacement for the pod2man command distributed with versions of Perl
15 # Perl core hackers, please note that this module is also separately
16 # maintained outside of the Perl core as part of the podlators. Please send
17 # me any patches at the address above in addition to sending them to the
18 # standard Perl mailing lists.
20 ##############################################################################
21 # Modules and declarations
22 ##############################################################################
28 use Carp qw(carp croak);
29 use Pod::ParseLink qw(parselink);
33 use subs qw(makespace);
34 use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
36 @ISA = qw(Pod::Parser);
38 # Don't use the CVS revision as the version, since this module is also in Perl
39 # core and too many things could munge CVS magic revision strings. This
40 # number should ideally be the same as the CVS revision in podlators, however.
44 ##############################################################################
45 # Preamble and *roff output tables
46 ##############################################################################
48 # The following is the static preamble which starts all *roff output we
49 # generate. It's completely static except for the font to use as a
50 # fixed-width font, which is designed by @CFONT@, and the left and right
51 # quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@. $PREAMBLE
52 # should therefore be run through s/\@CFONT\@/<font>/g before output.
53 $PREAMBLE = <<'----END OF PREAMBLE----';
54 .de Sh \" Subsection heading
62 .de Sp \" Vertical space (when we can't use .PP)
66 .de Vb \" Begin verbatim text
71 .de Ve \" End verbatim text
75 .\" Set up some character translations and predefined strings. \*(-- will
76 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
77 .\" double quote, and \*(R" will give a right double quote. | will give a
78 .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
79 .\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
80 .\" expand to `' in nroff, nothing in troff, for use with C<>.
82 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
86 . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
87 . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
100 .\" If the F register is turned on, we'll generate index entries on stderr for
101 .\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
102 .\" entries marked with X<> in POD. Of course, you'll have to process the
103 .\" output yourself in some meaningful fashion.
106 . tm Index:\\$1\t\\n%\t"\\$2"
112 .\" For nroff, turn off justification. Always turn off hyphenation; it makes
113 .\" way too many mistakes in technical documents.
117 .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
118 .\" Fear. Run. Save yourself. No user-serviceable parts.
119 . \" fudge factors for nroff and troff
128 . ds #H ((1u-(\\\\n(.fu%2u))*.13m)
134 . \" simple accents for nroff and troff
144 . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
145 . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
146 . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
147 . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
148 . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
149 . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
151 . \" troff and (daisy-wheel) nroff accents
152 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
153 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
154 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
155 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
156 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
157 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
158 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
159 .ds ae a\h'-(\w'a'u*4/10)'e
160 .ds Ae A\h'-(\w'A'u*4/10)'E
161 . \" corrections for vroff
162 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
163 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
164 . \" for low resolution devices (crt and lpr)
165 .if \n(.H>23 .if \n(.V>19 \
178 ----END OF PREAMBLE----
181 # This table is taken nearly verbatim from Tom Christiansen's pod2man. It
182 # assumes that the standard preamble has already been printed, since that's
183 # what defines all of the accent marks. Note that some of these are quoted
184 # with double quotes since they contain embedded single quotes, so use \\
185 # uniformly for backslash for readability.
187 'amp' => '&', # ampersand
188 'apos' => "'", # apostrophe
189 'lt' => '<', # left chevron, less-than
190 'gt' => '>', # right chevron, greater-than
191 'quot' => '"', # double quote
192 'sol' => '/', # solidus (forward slash)
193 'verbar' => '|', # vertical bar
195 'Aacute' => "A\\*'", # capital A, acute accent
196 'aacute' => "a\\*'", # small a, acute accent
197 'Acirc' => 'A\\*^', # capital A, circumflex accent
198 'acirc' => 'a\\*^', # small a, circumflex accent
199 'AElig' => '\*(AE', # capital AE diphthong (ligature)
200 'aelig' => '\*(ae', # small ae diphthong (ligature)
201 'Agrave' => "A\\*`", # capital A, grave accent
202 'agrave' => "A\\*`", # small a, grave accent
203 'Aring' => 'A\\*o', # capital A, ring
204 'aring' => 'a\\*o', # small a, ring
205 'Atilde' => 'A\\*~', # capital A, tilde
206 'atilde' => 'a\\*~', # small a, tilde
207 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
208 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
209 'Ccedil' => 'C\\*,', # capital C, cedilla
210 'ccedil' => 'c\\*,', # small c, cedilla
211 'Eacute' => "E\\*'", # capital E, acute accent
212 'eacute' => "e\\*'", # small e, acute accent
213 'Ecirc' => 'E\\*^', # capital E, circumflex accent
214 'ecirc' => 'e\\*^', # small e, circumflex accent
215 'Egrave' => 'E\\*`', # capital E, grave accent
216 'egrave' => 'e\\*`', # small e, grave accent
217 'ETH' => '\\*(D-', # capital Eth, Icelandic
218 'eth' => '\\*(d-', # small eth, Icelandic
219 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
220 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
221 'Iacute' => "I\\*'", # capital I, acute accent
222 'iacute' => "i\\*'", # small i, acute accent
223 'Icirc' => 'I\\*^', # capital I, circumflex accent
224 'icirc' => 'i\\*^', # small i, circumflex accent
225 'Igrave' => 'I\\*`', # capital I, grave accent
226 'igrave' => 'i\\*`', # small i, grave accent
227 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
228 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
229 'Ntilde' => 'N\*~', # capital N, tilde
230 'ntilde' => 'n\*~', # small n, tilde
231 'Oacute' => "O\\*'", # capital O, acute accent
232 'oacute' => "o\\*'", # small o, acute accent
233 'Ocirc' => 'O\\*^', # capital O, circumflex accent
234 'ocirc' => 'o\\*^', # small o, circumflex accent
235 'Ograve' => 'O\\*`', # capital O, grave accent
236 'ograve' => 'o\\*`', # small o, grave accent
237 'Oslash' => 'O\\*/', # capital O, slash
238 'oslash' => 'o\\*/', # small o, slash
239 'Otilde' => 'O\\*~', # capital O, tilde
240 'otilde' => 'o\\*~', # small o, tilde
241 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
242 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
243 'szlig' => '\*8', # small sharp s, German (sz ligature)
244 'THORN' => '\\*(Th', # capital THORN, Icelandic
245 'thorn' => '\\*(th', # small thorn, Icelandic
246 'Uacute' => "U\\*'", # capital U, acute accent
247 'uacute' => "u\\*'", # small u, acute accent
248 'Ucirc' => 'U\\*^', # capital U, circumflex accent
249 'ucirc' => 'u\\*^', # small u, circumflex accent
250 'Ugrave' => 'U\\*`', # capital U, grave accent
251 'ugrave' => 'u\\*`', # small u, grave accent
252 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
253 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
254 'Yacute' => "Y\\*'", # capital Y, acute accent
255 'yacute' => "y\\*'", # small y, acute accent
256 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
258 'nbsp' => '\\ ', # non-breaking space
259 'shy' => '', # soft (discretionary) hyphen
263 ##############################################################################
264 # Static helper functions
265 ##############################################################################
267 # Protect leading quotes and periods against interpretation as commands. Also
268 # protect anything starting with a backslash, since it could expand or hide
269 # something that *roff would interpret as a command. This is overkill, but
270 # it's much simpler than trying to parse *roff here.
273 s/^([.\'\\])/\\&$1/mg;
277 # Translate a font string into an escape.
278 sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
281 ##############################################################################
283 ##############################################################################
285 # Initialize the object. Here, we also process any additional options passed
286 # to the constructor or set up defaults if none were given. center is the
287 # centered title, release is the version number, and date is the date for the
288 # documentation. Note that we can't know what file name we're processing due
289 # to the architecture of Pod::Parser, so that *has* to either be passed to the
290 # constructor or set separately with Pod::Man::name().
294 # Figure out the fixed-width font. If user-supplied, make sure that they
295 # are the right length.
296 for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
297 if (defined $$self{$_}) {
298 if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
299 croak qq(roff font should be 1 or 2 chars,)
300 . qq( not "$$self{$_}");
307 # Set the default fonts. We can't be sure what fixed bold-italic is going
308 # to be called, so default to just bold.
309 $$self{fixed} ||= 'CW';
310 $$self{fixedbold} ||= 'CB';
311 $$self{fixeditalic} ||= 'CI';
312 $$self{fixedbolditalic} ||= 'CB';
314 # Set up a table of font escapes. First number is fixed-width, second is
315 # bold, third is italic.
316 $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
317 '010' => '\fB', '011' => '\f(BI',
318 '100' => toescape ($$self{fixed}),
319 '101' => toescape ($$self{fixeditalic}),
320 '110' => toescape ($$self{fixedbold}),
321 '111' => toescape ($$self{fixedbolditalic})};
323 # Extra stuff for page titles.
324 $$self{center} = 'User Contributed Perl Documentation'
325 unless defined $$self{center};
326 $$self{indent} = 4 unless defined $$self{indent};
328 # We used to try first to get the version number from a local binary, but
329 # we shouldn't need that any more. Get the version from the running Perl.
330 # Work a little magic to handle subversions correctly under both the
331 # pre-5.6 and the post-5.6 version numbering schemes.
332 if (!defined $$self{release}) {
333 my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
335 $version[2] *= 10 ** (3 - length $version[2]);
336 for (@version) { $_ += 0 }
337 $$self{release} = 'perl v' . join ('.', @version);
340 # Double quotes in things that will be quoted.
341 for (qw/center date release/) {
342 $$self{$_} =~ s/\"/\"\"/g if $$self{$_};
345 # Figure out what quotes we'll be using for C<> text.
346 $$self{quotes} ||= '"';
347 if ($$self{quotes} eq 'none') {
348 $$self{LQUOTE} = $$self{RQUOTE} = '';
349 } elsif (length ($$self{quotes}) == 1) {
350 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
351 } elsif ($$self{quotes} =~ /^(.)(.)$/
352 || $$self{quotes} =~ /^(..)(..)$/) {
356 croak qq(Invalid quote specification "$$self{quotes}");
359 # Double the first quote; note that this should not be s///g as two double
360 # quotes is represented in *roff as three double quotes, not four. Weird,
362 $$self{LQUOTE} =~ s/\"/\"\"/;
363 $$self{RQUOTE} =~ s/\"/\"\"/;
365 $self->SUPER::initialize;
368 # For each document we process, output the preamble first.
372 # Try to figure out the name and section from the file name.
373 my $section = $$self{section} || 1;
374 my $name = $$self{name};
375 if (!defined $name) {
376 $name = $self->input_file;
377 $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
378 $name =~ s/\.p(od|[lm])\z//i;
379 if ($section !~ /^3/) {
380 require File::Basename;
381 $name = uc File::Basename::basename ($name);
383 # Assume that we're dealing with a module. We want to figure out
384 # the full module name from the path to the file, but we don't
385 # want to include too much of the path into the module name. Lose
386 # everything up to the first of:
388 # */lib/*perl*/ standard or site_perl module
389 # */*perl*/lib/ from -Dprefix=/opt/perl
390 # */*perl*/ random module hierarchy
392 # which works. Also strip off a leading site or site_perl
393 # component, any OS-specific component, and any version number
394 # component, and strip off an initial component of "lib" or
395 # "blib/lib" since that's what ExtUtils::MakeMaker creates.
396 # splitdir requires at least File::Spec 0.8.
398 my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
399 my @dirs = File::Spec->splitdir ($dirs);
402 for ($i = 0; $i < scalar @dirs; $i++) {
403 if ($dirs[$i] eq 'lib' && $dirs[$i + 1] =~ /perl/) {
406 } elsif ($dirs[$i] =~ /perl/) {
408 $cut++ if $dirs[$i + 1] eq 'lib';
413 splice (@dirs, 0, $cut);
414 shift @dirs if ($dirs[0] =~ /^site(_perl)?$/);
415 shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
416 shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
418 shift @dirs if $dirs[0] eq 'lib';
419 splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
421 # Remove empty directories when building the module name; they
422 # occur too easily on Unix by doubling slashes.
423 $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
427 # If $name contains spaces, quote it; this mostly comes up in the case of
429 $name = '"' . $name . '"' if ($name =~ /\s/);
431 # Modification date header. Try to use the modification time of our
433 if (!defined $$self{date}) {
434 my $time = (stat $self->input_file)[9] || time;
435 my ($day, $month, $year) = (localtime $time)[3,4,5];
438 $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day);
441 # Now, print out the preamble and the title. The meaning of the arguments
442 # to .TH unfortunately vary by system; some systems consider the fourth
443 # argument to be a "source" and others use it as a version number.
444 # Generally it's just presented as the left-side footer, though, so it
445 # doesn't matter too much if a particular system gives it another
448 # The order of date and release used to be reversed in older versions of
449 # this module, but this order is correct for both Solaris and Linux.
450 local $_ = $PREAMBLE;
451 s/\@CFONT\@/$$self{fixed}/;
452 s/\@LQUOTE\@/$$self{LQUOTE}/;
453 s/\@RQUOTE\@/$$self{RQUOTE}/;
455 my $pversion = $Pod::Parser::VERSION;
456 print { $self->output_handle } <<"----END OF HEADER----";
457 .\\" Automatically generated by Pod::Man v$VERSION, Pod::Parser v$pversion
459 .\\" Standard preamble:
460 .\\" ========================================================================
462 .\\" ========================================================================
464 .IX Title "$name $section"
465 .TH $name $section "$$self{date}" "$$self{release}" "$$self{center}"
466 ----END OF HEADER----
468 # Initialize a few per-file variables.
469 $$self{INDENT} = 0; # Current indentation level.
470 $$self{INDENTS} = []; # Stack of indentations.
471 $$self{INDEX} = []; # Index keys waiting to be printed.
472 $$self{IN_NAME} = 0; # Whether processing the NAME section.
473 $$self{ITEMS} = 0; # The number of consecutive =items.
474 $$self{SHIFTWAIT} = 0; # Whether there is a shift waiting.
475 $$self{SHIFTS} = []; # Stack of .RS shifts.
479 ##############################################################################
481 ##############################################################################
483 # Called for each command paragraph. Gets the command, the associated
484 # paragraph, the line number, and a Pod::Paragraph object. Just dispatches
485 # the command to a method named the same as the command. =cut is handled
486 # internally by Pod::Parser.
490 return if $command eq 'pod';
491 return if ($$self{EXCLUDE} && $command ne 'end');
492 if ($self->can ('cmd_' . $command)) {
493 $command = 'cmd_' . $command;
494 $self->$command (@_);
496 my ($text, $line, $paragraph) = @_;
498 ($file, $line) = $paragraph->file_line;
500 $text = " $text" if ($text =~ /^\S/);
501 warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
506 # Called for a verbatim paragraph. Gets the paragraph, the line number, and a
507 # Pod::Paragraph object. Rofficate backslashes, untabify, put a zero-width
508 # character at the beginning of each line to protect against commands, and
512 return if $$self{EXCLUDE};
516 my $lines = tr/\n/\n/;
517 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
519 s/^(\s*\S)/'\&' . $1/gme;
521 $self->output (".Vb $lines\n$_.Ve\n");
522 $$self{NEEDSPACE} = 1;
525 # Called for a regular text block. Gets the paragraph, the line number, and a
526 # Pod::Paragraph object. Perform interpolation and output the results.
529 return if $$self{EXCLUDE};
530 $self->output ($_[0]), return if $$self{VERBATIM};
532 # Parse the tree. collapse knows about references to scalars as well as
533 # scalars and does the right thing with them. Tidy up any trailing
536 $text = $self->parse ($text, @_);
537 $text =~ s/\n\s*$/\n/;
539 # Output the paragraph. We also have to handle =over without =item. If
540 # there's an =over without =item, NEWINDENT will be set, and we need to
541 # handle creation of the indent here. Set WEIRDINDENT so that it will be
542 # cleaned up on =back.
544 if ($$self{SHIFTWAIT}) {
545 $self->output (".RS $$self{INDENT}\n");
546 push (@{ $$self{SHIFTS} }, $$self{INDENT});
547 $$self{SHIFTWAIT} = 0;
549 $self->output (protect $self->textmapfonts ($text));
551 $$self{NEEDSPACE} = 1;
554 # Called for a formatting code. Takes a Pod::InteriorSequence object and
555 # returns a reference to a scalar. This scalar is the final formatted text.
556 # It's returned as a reference to an array so that other formatting codes
557 # above us know that the text has already been processed.
559 my ($self, $seq) = @_;
560 my $command = $seq->cmd_name;
562 # We have to defer processing of the inside of an L<> formatting code. If
563 # this code is nested inside an L<> code, return the literal raw text of
565 my $parent = $seq->nested;
566 while (defined $parent) {
567 return $seq->raw_text if ($parent->cmd_name eq 'L');
568 $parent = $parent->nested;
571 # Zero-width characters.
572 return [ '\&' ] if ($command eq 'Z');
574 # C<>, L<>, X<>, and E<> don't apply guesswork to their contents. C<>
575 # needs some additional special handling.
576 my $literal = ($command =~ /^[CELX]$/);
577 local $_ = $self->collapse ($seq->parse_tree, $literal, $command eq 'C');
579 # Handle E<> escapes. Numeric escapes that match one of the supported ISO
580 # 8859-1 characters don't work at present.
581 if ($command eq 'E') {
584 } elsif (exists $ESCAPES{$_}) {
585 return [ $ESCAPES{$_} ];
587 my ($file, $line) = $seq->file_line;
588 warn "$file:$line: Unknown escape E<$_>\n";
593 # For all the other codes, empty content produces no output.
594 return '' if $_ eq '';
596 # Handle simple formatting codes.
597 if ($command eq 'B') {
598 return [ '\f(BS' . $_ . '\f(BE' ];
599 } elsif ($command eq 'F' || $command eq 'I') {
600 return [ '\f(IS' . $_ . '\f(IE' ];
601 } elsif ($command eq 'C') {
602 return [ $self->quote_literal ($_) ];
606 if ($command eq 'L') {
607 my ($text, $type) = (parselink ($_))[1,4];
608 return '' unless $text;
609 my ($file, $line) = $seq->file_line;
610 $text = $self->parse ($text, $line);
611 $text = '<' . $text . '>' if $type eq 'url';
615 # Whitespace protection replaces whitespace with "\ ".
616 if ($command eq 'S') {
621 # Add an index entry to the list of ones waiting to be output.
622 if ($command eq 'X') {
623 push (@{ $$self{INDEX} }, $_);
627 # Anything else is unknown.
628 my ($file, $line) = $seq->file_line;
629 warn "$file:$line: Unknown formatting code $command<$_>\n";
633 ##############################################################################
635 ##############################################################################
637 # All command paragraphs take the paragraph and the line number.
639 # First level heading. We can't output .IX in the NAME section due to a bug
640 # in some versions of catman, so don't output a .IX for that section. .SH
641 # already uses small caps, so remove \s1 and \s-1. Maintain IN_NAME as
642 # appropriate, but don't leave it set while calling parse() so as to not
643 # override guesswork on section headings after NAME.
647 local $_ = $self->parse (@_);
651 if ($$self{ITEMS} > 1) {
653 $self->output (".PD\n");
655 $self->output ($self->switchquotes ('.SH', $self->mapfonts ($_)));
656 $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
657 $$self{NEEDSPACE} = 0;
658 $$self{IN_NAME} = ($_ eq 'NAME');
661 # Second level heading.
664 local $_ = $self->parse (@_);
667 if ($$self{ITEMS} > 1) {
669 $self->output (".PD\n");
671 $self->output ($self->switchquotes ('.Sh', $self->mapfonts ($_)));
672 $self->outindex ('Subsection', $_);
673 $$self{NEEDSPACE} = 0;
676 # Third level heading.
679 local $_ = $self->parse (@_);
682 if ($$self{ITEMS} > 1) {
684 $self->output (".PD\n");
687 $self->output ($self->textmapfonts ('\f(IS' . $_ . '\f(IE') . "\n");
688 $self->outindex ('Subsection', $_);
689 $$self{NEEDSPACE} = 1;
692 # Fourth level heading.
695 local $_ = $self->parse (@_);
698 if ($$self{ITEMS} > 1) {
700 $self->output (".PD\n");
703 $self->output ($self->textmapfonts ($_) . "\n");
704 $self->outindex ('Subsection', $_);
705 $$self{NEEDSPACE} = 1;
708 # Start a list. For indents after the first, wrap the outside indent in .RS
709 # so that hanging paragraph tags will be correct.
713 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
714 if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
715 $self->output (".RS $$self{INDENT}\n");
716 push (@{ $$self{SHIFTS} }, $$self{INDENT});
718 push (@{ $$self{INDENTS} }, $$self{INDENT});
719 $$self{INDENT} = ($_ + 0);
720 $$self{SHIFTWAIT} = 1;
723 # End a list. If we've closed an embedded indent, we've mangled the hanging
724 # paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
725 # We'll close that .RS at the next =back or =item.
728 $$self{INDENT} = pop @{ $$self{INDENTS} };
729 unless (defined $$self{INDENT}) {
730 my ($file, $line, $paragraph) = @_;
731 ($file, $line) = $paragraph->file_line;
732 warn "$file:$line: Unmatched =back\n";
735 if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
736 $self->output (".RE\n");
737 pop @{ $$self{SHIFTS} };
739 if (@{ $$self{INDENTS} } > 0) {
740 $self->output (".RE\n");
741 $self->output (".RS $$self{INDENT}\n");
743 $$self{NEEDSPACE} = 1;
744 $$self{SHIFTWAIT} = 0;
747 # An individual list item. Emit an index entry for anything that's
748 # interesting, but don't emit index entries for things like bullets and
749 # numbers. rofficate bullets too while we're at it (so for nice output, use *
750 # for your lists rather than o or . or - or some other thing). Newlines in an
751 # item title are turned into spaces since *roff can't handle them embedded.
754 local $_ = $self->parse (@_);
758 if (/\w/ && !/^\w[.\)]\s*$/) {
760 $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//;
763 s/^\*(\s|\Z)/\\\(bu$1/;
764 if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
765 $self->output (".RE\n");
766 pop @{ $$self{SHIFTS} };
768 $_ = $self->textmapfonts ($_);
769 $self->output (".PD 0\n") if ($$self{ITEMS} == 1);
770 $self->output ($self->switchquotes ('.IP', $_, $$self{INDENT}));
771 $self->outindex ($index ? ('Item', $index) : ());
772 $$self{NEEDSPACE} = 0;
774 $$self{SHIFTWAIT} = 0;
777 # Begin a block for a particular translator. Setting VERBATIM triggers
778 # special handling in textblock().
782 my ($kind) = /^(\S+)/ or return;
783 if ($kind eq 'man' || $kind eq 'roff') {
784 $$self{VERBATIM} = 1;
790 # End a block for a particular translator. We assume that all =begin/=end
791 # pairs are properly closed.
795 $$self{VERBATIM} = 0;
798 # One paragraph for a particular translator. Ignore it unless it's intended
799 # for man or roff, in which case we output it verbatim.
803 return unless s/^(?:man|roff)\b[ \t]*\n?//;
808 ##############################################################################
809 # Escaping and fontification
810 ##############################################################################
812 # At this point, we'll have embedded font codes of the form \f(<font>[SE]
813 # where <font> is one of B, I, or F. Turn those into the right font start or
814 # end codes. The old pod2man didn't get B<someI<thing> else> right; after I<>
815 # it switched back to normal text rather than bold. We take care of this by
816 # using variables as a combined pointer to our current font sequence, and set
817 # each to the number of current nestings of start tags for that font. Use
818 # them as a vector to look up what font sequence to use.
820 # \fP changes to the previous font, but only one previous font is kept. We
821 # don't know what the outside level font is; normally it's R, but if we're
822 # inside a heading it could be something else. So arrange things so that the
823 # outside font is always the "previous" font and end with \fP instead of \fR.
824 # Idea from Zack Weinberg.
829 my ($fixed, $bold, $italic) = (0, 0, 0);
830 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
835 if ($last ne '\fR') { $sequence = '\fP' }
836 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
837 $f = $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
841 if ($f ne '\fR') { $sequence .= $f }
849 # Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU
850 # groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather
851 # than R, presumably because \f(CW doesn't actually do a font change. To work
852 # around this, use a separate textmapfonts for text blocks where the default
853 # font is always R and only use the smart mapfonts for headings.
858 my ($fixed, $bold, $italic) = (0, 0, 0);
859 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
861 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
862 $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
868 ##############################################################################
869 # *roff-specific parsing and magic
870 ##############################################################################
872 # Called instead of parse_text, calls parse_text with the right flags.
875 $self->parse_text ({ -expand_seq => 'sequence',
876 -expand_ptree => 'collapse' }, @_);
879 # Takes a parse tree, a flag saying whether or not to treat it as literal text
880 # (not call guesswork on it), and a flag saying whether or not to clean some
881 # things up for *roff, and returns the concatenation of all of the text
882 # strings in that parse tree. If the literal flag isn't true, guesswork()
883 # will be called on all plain scalars in the parse tree. Otherwise, if
884 # collapse is being called on a C<> code, $cleanup should be set to true and
885 # some additional cleanup will be done. Assumes that everything in the parse
886 # tree is either a scalar or a reference to a scalar.
888 my ($self, $ptree, $literal, $cleanup) = @_;
890 # If we're processing the NAME section, don't do normal guesswork. This
891 # is because NAME lines are often extracted by utilities like catman that
892 # require plain text and don't understand *roff markup. We still need to
893 # escape backslashes and hyphens for *roff (and catman expects \- instead
895 if ($$self{IN_NAME}) {
900 # Do the collapse of the parse tree as described above.
901 return join ('', map {
912 $self->guesswork ($_);
917 # Takes a text block to perform guesswork on; this is guaranteed not to
918 # contain any formatting codes. Returns the text block with remapping done.
923 # rofficate backslashes.
926 # Ensure double underbars have a tiny space between them.
929 # Leave hyphens only if they're part of regular words and there is only
930 # one dash at a time. Leave a dash after the first character as a regular
931 # non-breaking dash, but don't let it mark the rest of the word invalid
935 ( (?:\G|^|\s) [a-zA-Z] ) ( \\- )?
936 ( (?: [a-zA-Z]+ \\-)+ )
937 ( [a-zA-Z]+ ) (?=\s|\Z)
940 my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
943 $prefix . $hyphen . $main . $suffix;
946 # Translate -- into a real em dash if it's used like one.
947 s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx;
948 s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx;
950 # Make all caps a little smaller. Be careful here, since we don't want to
951 # make @ARGV into small caps, nor do we want to fix the MIME in
952 # MIME-Version, since it looks weird with the full-height V.
954 ( ^ | [\s\(\"\'\`\[\{<>] )
955 ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* )
956 (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | $ )
957 } { $1 . '\s-1' . $2 . '\s0' }egx;
959 # Italize functions in the form func().
963 [A-Za-z_] ([:\w]|\\s-?[01])+ \(\)
965 } { $1 . '\f(IS' . $2 . '\f(IE' }egx;
967 # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
970 ( [A-Za-z_] (?:[.:\w]|\\-|\\s-?[01])+ )
974 } { $1 . '\f(IS' . $2 . '\f(IE\|' . $3 }egx;
976 # Convert simple Perl variable references to a fixed-width font.
981 } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
983 # Fix up double quotes.
984 s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
986 # Make C++ into \*(C+, which is a squinched version.
987 s{ \b C\+\+ } {\\*\(C+}gx;
993 # Handles C<> text, deciding whether to put \*C` around it or not. This is a
994 # whole bunch of messy heuristics to try to avoid overquoting, originally from
995 # Barrie Slaymaker. This largely duplicates similar code in Pod::Text.
1000 # A regex that matches the portion of a variable reference that's the
1001 # array or hash index, separated out just because we want to use it in
1002 # several places in the following regex.
1003 my $index = '(?: \[.*\] | \{.*\} )?';
1005 # Check for things that we don't want to quote, and if we find any of
1006 # them, return the string with just a font change and no quoting.
1010 ( [\'\`\"] ) .* \1 # already quoted
1011 | \` .* \' # `quoted'
1012 | \$+ [\#^]? \S $index # special ($^Foo, $")
1013 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
1014 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
1015 | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )? # a number
1016 | 0x [a-fA-F\d]+ # a hex constant
1019 }xo && return '\f(FS' . $_ . '\f(FE';
1021 # If we didn't return, go ahead and quote the text.
1022 return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
1026 ##############################################################################
1028 ##############################################################################
1030 # Make vertical whitespace.
1033 $self->output (".PD\n") if ($$self{ITEMS} > 1);
1035 $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
1036 if $$self{NEEDSPACE};
1039 # Output any pending index entries, and optionally an index entry given as an
1040 # argument. Support multiple index entries in X<> separated by slashes, and
1041 # strip special escapes from index entries.
1043 my ($self, $section, $index) = @_;
1044 my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
1045 return unless ($section || @entries);
1049 push (@output, [ 'Xref', join (' ', @entries) ]);
1052 $index =~ s/\\-/-/g;
1053 $index =~ s/\\(?:s-?\d|.\(..|.)//g;
1054 push (@output, [ $section, $index ]);
1057 my ($type, $entry) = @$_;
1058 $entry =~ s/\"/\"\"/g;
1059 $self->output (".IX $type " . '"' . $entry . '"' . "\n");
1063 # Output text to the output device.
1064 sub output { print { $_[0]->output_handle } $_[1] }
1066 # Given a command and a single argument that may or may not contain double
1067 # quotes, handle double-quote formatting for it. If there are no double
1068 # quotes, just return the command followed by the argument in double quotes.
1069 # If there are double quotes, use an if statement to test for nroff, and for
1070 # nroff output the command followed by the argument in double quotes with
1071 # embedded double quotes doubled. For other formatters, remap paired double
1072 # quotes to LQUOTE and RQUOTE.
1075 my $command = shift;
1078 s/\\\*\([LR]\"/\"/g;
1080 # We also have to deal with \*C` and \*C', which are used to add the
1081 # quotes around C<> text, since they may expand to " and if they do this
1082 # confuses the .SH macros and the like no end. Expand them ourselves.
1083 # Also separate troff from nroff if there are any fixed-width fonts in use
1084 # to work around problems with Solaris nroff.
1085 my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/);
1086 my $fixedpat = join ('|', @{ $$self{FONTS} }{'100', '101', '110', '111'});
1087 $fixedpat =~ s/\\/\\\\/g;
1088 $fixedpat =~ s/\(/\\\(/g;
1089 if (/\"/ || /$fixedpat/) {
1093 $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
1094 if ($c_is_quote && /\\\*\(C[\'\`]/) {
1095 $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g;
1096 $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g;
1097 $troff =~ s/\\\*\(C[\'\`]//g;
1099 $nroff = qq("$nroff") . ($extra ? " $extra" : '');
1100 $troff = qq("$troff") . ($extra ? " $extra" : '');
1102 # Work around the Solaris nroff bug where \f(CW\fP leaves the font set
1103 # to Roman rather than the actual previous font when used in headings.
1104 # troff output may still be broken, but at least we can fix nroff by
1105 # just switching the font changes to the non-fixed versions.
1106 $nroff =~ s/\Q$$self{FONTS}{100}\E(.*)\\f[PR]/$1/g;
1107 $nroff =~ s/\Q$$self{FONTS}{101}\E(.*)\\f([PR])/\\fI$1\\f$2/g;
1108 $nroff =~ s/\Q$$self{FONTS}{110}\E(.*)\\f([PR])/\\fB$1\\f$2/g;
1109 $nroff =~ s/\Q$$self{FONTS}{111}\E(.*)\\f([PR])/\\f\(BI$1\\f$2/g;
1111 # Now finally output the command. Only bother with .ie if the nroff
1112 # and troff output isn't the same.
1113 if ($nroff ne $troff) {
1114 return ".ie n $command $nroff\n.el $command $troff\n";
1116 return "$command $nroff\n";
1119 $_ = qq("$_") . ($extra ? " $extra" : '');
1120 return "$command $_\n";
1126 ##############################################################################
1128 ##############################################################################
1132 Pod::Man - Convert POD data to formatted *roff input
1137 my $parser = Pod::Man->new (release => $VERSION, section => 8);
1139 # Read POD from STDIN and write to STDOUT.
1140 $parser->parse_from_filehandle;
1142 # Read POD from file.pod and write to file.1.
1143 $parser->parse_from_file ('file.pod', 'file.1');
1147 Pod::Man is a module to convert documentation in the POD format (the
1148 preferred language for documenting Perl) into *roff input using the man
1149 macro set. The resulting *roff code is suitable for display on a terminal
1150 using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
1151 It is conventionally invoked using the driver script B<pod2man>, but it can
1152 also be used directly.
1154 As a derived class from Pod::Parser, Pod::Man supports the same methods and
1155 interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
1156 new parser with C<< Pod::Man->new() >> and then calls either
1157 parse_from_filehandle() or parse_from_file().
1159 new() can take options, in the form of key/value pairs that control the
1160 behavior of the parser. See below for details.
1162 If no options are given, Pod::Man uses the name of the input file with any
1163 trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1164 section 1 unless the file ended in C<.pm> in which case it defaults to
1165 section 3, to a centered title of "User Contributed Perl Documentation", to
1166 a centered footer of the Perl version it is run with, and to a left-hand
1167 footer of the modification date of its input (or the current date if given
1170 Pod::Man assumes that your *roff formatters have a fixed-width font named
1171 CW. If yours is called something else (like CR), use the C<fixed> option to
1172 specify it. This generally only matters for troff output for printing.
1173 Similarly, you can set the fonts used for bold, italic, and bold italic
1176 Besides the obvious pod conversions, Pod::Man also takes care of formatting
1177 func(), func(3), and simple variable references like $foo or @bar so you
1178 don't have to use code escapes for them; complex expressions like
1179 C<$fred{'stuff'}> will still need to be escaped, though. It also translates
1180 dashes that aren't used as hyphens into en dashes, makes long dashes--like
1181 this--into proper em dashes, fixes "paired quotes," makes C++ look right,
1182 puts a little space between double underbars, makes ALLCAPS a teeny bit
1183 smaller in B<troff>, and escapes stuff that *roff treats as special so that
1186 The recognized options to new() are as follows. All options take a single
1193 Sets the centered page header to use instead of "User Contributed Perl
1198 Sets the left-hand footer. By default, the modification date of the input
1199 file will be used, or the current date if stat() can't find that file (the
1200 case if the input is from STDIN), and the date will be formatted as
1205 The fixed-width font to use for vertabim text and code. Defaults to CW.
1206 Some systems may want CR instead. Only matters for B<troff> output.
1210 Bold version of the fixed-width font. Defaults to CB. Only matters for
1215 Italic version of the fixed-width font (actually, something of a misnomer,
1216 since most fixed-width fonts only have an oblique version, not an italic
1217 version). Defaults to CI. Only matters for B<troff> output.
1219 =item fixedbolditalic
1221 Bold italic (probably actually oblique) version of the fixed-width font.
1222 Pod::Man doesn't assume you have this, and defaults to CB. Some systems
1223 (such as Solaris) have this font available as CX. Only matters for B<troff>
1228 Set the name of the manual page. Without this option, the manual name is
1229 set to the uppercased base name of the file being converted unless the
1230 manual section is 3, in which case the path is parsed to see if it is a Perl
1231 module path. If it is, a path like C<.../lib/Pod/Man.pm> is converted into
1232 a name like C<Pod::Man>. This option, if given, overrides any automatic
1233 determination of the name.
1237 Sets the quote marks used to surround CE<lt>> text. If the value is a
1238 single character, it is used as both the left and right quote; if it is two
1239 characters, the first character is used as the left quote and the second as
1240 the right quoted; and if it is four characters, the first two are used as
1241 the left quote and the second two as the right quote.
1243 This may also be set to the special value C<none>, in which case no quote
1244 marks are added around CE<lt>> text (but the font is still changed for troff
1249 Set the centered footer. By default, this is the version of Perl you run
1250 Pod::Man under. Note that some system an macro sets assume that the
1251 centered footer will be a modification date and will prepend something like
1252 "Last modified: "; if this is the case, you may want to set C<release> to
1253 the last modified date and C<date> to the version number.
1257 Set the section for the C<.TH> macro. The standard section numbering
1258 convention is to use 1 for user commands, 2 for system calls, 3 for
1259 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1260 miscellaneous information, and 8 for administrator commands. There is a lot
1261 of variation here, however; some systems (like Solaris) use 4 for file
1262 formats, 5 for miscellaneous information, and 7 for devices. Still others
1263 use 1m instead of 8, or some mix of both. About the only section numbers
1264 that are reliably consistent are 1, 2, and 3.
1266 By default, section 1 will be used unless the file ends in .pm in which case
1267 section 3 will be selected.
1271 The standard Pod::Parser method parse_from_filehandle() takes up to two
1272 arguments, the first being the file handle to read POD from and the second
1273 being the file handle to write the formatted output to. The first defaults
1274 to STDIN if not given, and the second defaults to STDOUT. The method
1275 parse_from_file() is almost identical, except that its two arguments are the
1276 input and output disk files instead. See L<Pod::Parser> for the specific
1283 =item roff font should be 1 or 2 chars, not "%s"
1285 (F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1286 wasn't either one or two characters. Pod::Man doesn't support *roff fonts
1287 longer than two characters, although some *roff extensions do (the canonical
1288 versions of B<nroff> and B<troff> don't either).
1290 =item Invalid link %s
1292 (W) The POD source contained a C<LE<lt>E<gt>> formatting code that
1293 Pod::Man was unable to parse. You should never see this error message; it
1294 probably indicates a bug in Pod::Man.
1296 =item Invalid quote specification "%s"
1298 (F) The quote specification given (the quotes option to the constructor) was
1299 invalid. A quote specification must be one, two, or four characters long.
1301 =item %s:%d: Unknown command paragraph "%s".
1303 (W) The POD source contained a non-standard command paragraph (something of
1304 the form C<=command args>) that Pod::Man didn't know about. It was ignored.
1306 =item %s:%d: Unknown escape EE<lt>%sE<gt>
1308 (W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1309 know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
1311 =item %s:%d: Unknown formatting code %s
1313 (W) The POD source contained a non-standard formatting code (something of
1314 the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
1316 =item %s:%d: Unmatched =back
1318 (W) Pod::Man encountered a C<=back> command that didn't correspond to an
1325 Eight-bit input data isn't handled at all well at present. The correct
1326 approach would be to map EE<lt>E<gt> escapes to the appropriate UTF-8
1327 characters and then do a translation pass on the output according to the
1328 user-specified output character set. Unfortunately, we can't send eight-bit
1329 data directly to the output unless the user says this is okay, since some
1330 vendor *roff implementations can't handle eight-bit data. If the *roff
1331 implementation can, however, that's far superior to the current hacked
1332 characters that only work under troff.
1334 There is currently no way to turn off the guesswork that tries to format
1335 unmarked text appropriately, and sometimes it isn't wanted (particularly
1336 when using POD to document something other than Perl).
1338 The NAME section should be recognized specially and index entries emitted
1339 for everything in that section. This would have to be deferred until the
1340 next section, since extraneous things in NAME tends to confuse various man
1343 Pod::Man doesn't handle font names longer than two characters. Neither do
1344 most B<troff> implementations, but GNU troff does as an extension. It would
1345 be nice to support as an option for those who want to use it.
1347 The preamble added to each output file is rather verbose, and most of it is
1348 only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1349 characters. It would ideally be nice if all of those definitions were only
1350 output if needed, perhaps on the fly as the characters are used.
1352 Pod::Man is excessively slow.
1356 The handling of hyphens and em dashes is somewhat fragile, and one may get
1357 the wrong one under some circumstances. This should only matter for
1360 When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1361 necessarily get it right.
1365 L<Pod::Parser>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>,
1366 L<man(1)>, L<man(7)>
1368 Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
1369 Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
1370 the best documentation of standard B<nroff> and B<troff>. At the time of
1371 this writing, it's available at
1372 L<http://www.cs.bell-labs.com/cm/cs/cstr.html>.
1374 The man page documenting the man macro set may be L<man(5)> instead of
1375 L<man(7)> on your system. Also, please see L<pod2man(1)> for extensive
1376 documentation on writing manual pages if you've not done it before and
1377 aren't familiar with the conventions.
1381 Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
1382 B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>.
1384 =head1 COPYRIGHT AND LICENSE
1386 Copyright 1999, 2000, 2001, 2002 by Russ Allbery <rra@stanford.edu>.
1388 This program is free software; you may redistribute it and/or modify it
1389 under the same terms as Perl itself.