Upgrade to podlators 1.16.
[p5sagit/p5-mst-13.2.git] / lib / Pod / Man.pm
CommitLineData
9741dab0 1# Pod::Man -- Convert POD data to formatted *roff input.
b4558dc4 2# $Id: Man.pm,v 1.29 2001/11/26 08:35:15 eagle Exp $
9741dab0 3#
77003bb1 4# Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>
9741dab0 5#
3c014959 6# This program is free software; you may redistribute it and/or modify it
9741dab0 7# under the same terms as Perl itself.
8#
c9abbd5d 9# This module is intended to be a replacement for the pod2man script
10# distributed with versions of Perl prior to 5.6, and attempts to match its
11# output except for some specific circumstances where other decisions seemed
3c014959 12# to produce better output. It uses Pod::Parser and is designed to be easy to
13# subclass.
c9abbd5d 14#
15# Perl core hackers, please note that this module is also separately
16# maintained outside of the Perl core as part of the podlators. Please send
17# me any patches at the address above in addition to sending them to the
18# standard Perl mailing lists.
9741dab0 19
3c014959 20##############################################################################
9741dab0 21# Modules and declarations
3c014959 22##############################################################################
9741dab0 23
24package Pod::Man;
25
26require 5.004;
27
28use Carp qw(carp croak);
bf202ccd 29use Pod::ParseLink qw(parselink);
9741dab0 30use Pod::Parser ();
31
32use strict;
33use subs qw(makespace);
34use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
35
36@ISA = qw(Pod::Parser);
37
3c014959 38# Don't use the CVS revision as the version, since this module is also in Perl
39# core and too many things could munge CVS magic revision strings. This
40# number should ideally be the same as the CVS revision in podlators, however.
b4558dc4 41$VERSION = 1.29;
9741dab0 42
43
3c014959 44##############################################################################
9741dab0 45# Preamble and *roff output tables
3c014959 46##############################################################################
9741dab0 47
48# The following is the static preamble which starts all *roff output we
49# generate. It's completely static except for the font to use as a
ab1f1d91 50# fixed-width font, which is designed by @CFONT@, and the left and right
3c014959 51# quotes to use for C<> text, designated by @LQOUTE@ and @RQUOTE@. $PREAMBLE
52# should therefore be run through s/\@CFONT\@/<font>/g before output.
9741dab0 53$PREAMBLE = <<'----END OF PREAMBLE----';
54.de Sh \" Subsection heading
55.br
56.if t .Sp
57.ne 5
58.PP
59\fB\\$1\fR
60.PP
61..
62.de Sp \" Vertical space (when we can't use .PP)
63.if t .sp .5v
64.if n .sp
65..
9741dab0 66.de Vb \" Begin verbatim text
67.ft @CFONT@
68.nf
69.ne \\$1
70..
71.de Ve \" End verbatim text
72.ft R
73
74.fi
75..
76.\" Set up some character translations and predefined strings. \*(-- will
77.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
78.\" double quote, and \*(R" will give a right double quote. | will give a
3c014959 79.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used to
80.\" do unbreakable dashes and therefore won't be available. \*(C` and \*(C'
81.\" expand to `' in nroff, nothing in troff, for use with C<>.
9741dab0 82.tr \(*W-|\(bv\*(Tr
83.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
84.ie n \{\
85. ds -- \(*W-
86. ds PI pi
87. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
88. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
89. ds L" ""
90. ds R" ""
ab1f1d91 91. ds C` @LQUOTE@
92. ds C' @RQUOTE@
9741dab0 93'br\}
94.el\{\
95. ds -- \|\(em\|
96. ds PI \(*p
97. ds L" ``
98. ds R" ''
99'br\}
100.\"
3c014959 101.\" If the F register is turned on, we'll generate index entries on stderr for
102.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
103.\" entries marked with X<> in POD. Of course, you'll have to process the
104.\" output yourself in some meaningful fashion.
9741dab0 105.if \nF \{\
106. de IX
107. tm Index:\\$1\t\\n%\t"\\$2"
f3248e50 108..
9741dab0 109. nr % 0
110. rr F
111.\}
112.\"
3c014959 113.\" For nroff, turn off justification. Always turn off hyphenation; it makes
114.\" way too many mistakes in technical documents.
9741dab0 115.hy 0
116.if n .na
117.\"
118.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
119.\" Fear. Run. Save yourself. No user-serviceable parts.
9741dab0 120. \" fudge factors for nroff and troff
121.if n \{\
122. ds #H 0
123. ds #V .8m
124. ds #F .3m
125. ds #[ \f1
126. ds #] \fP
127.\}
128.if t \{\
129. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
130. ds #V .6m
131. ds #F 0
132. ds #[ \&
133. ds #] \&
134.\}
135. \" simple accents for nroff and troff
136.if n \{\
137. ds ' \&
138. ds ` \&
139. ds ^ \&
140. ds , \&
141. ds ~ ~
142. ds /
143.\}
144.if t \{\
145. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
146. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
147. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
148. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
149. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
150. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
151.\}
152. \" troff and (daisy-wheel) nroff accents
153.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
154.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
155.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
156.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
157.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
158.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
159.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
160.ds ae a\h'-(\w'a'u*4/10)'e
161.ds Ae A\h'-(\w'A'u*4/10)'E
162. \" corrections for vroff
163.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
164.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
165. \" for low resolution devices (crt and lpr)
166.if \n(.H>23 .if \n(.V>19 \
167\{\
168. ds : e
169. ds 8 ss
170. ds o a
171. ds d- d\h'-1'\(ga
172. ds D- D\h'-1'\(hy
173. ds th \o'bp'
174. ds Th \o'LP'
175. ds ae ae
176. ds Ae AE
177.\}
178.rm #[ #] #H #V #F C
179----END OF PREAMBLE----
5cdeb5a2 180#`# for cperl-mode
181
9741dab0 182# This table is taken nearly verbatim from Tom Christiansen's pod2man. It
183# assumes that the standard preamble has already been printed, since that's
184# what defines all of the accent marks. Note that some of these are quoted
185# with double quotes since they contain embedded single quotes, so use \\
186# uniformly for backslash for readability.
187%ESCAPES = (
188 'amp' => '&', # ampersand
bf202ccd 189 'apos' => "'", # apostrophe
9741dab0 190 'lt' => '<', # left chevron, less-than
191 'gt' => '>', # right chevron, greater-than
192 'quot' => '"', # double quote
ab1f1d91 193 'sol' => '/', # solidus (forward slash)
be3174d2 194 'verbar' => '|', # vertical bar
9741dab0 195
196 'Aacute' => "A\\*'", # capital A, acute accent
197 'aacute' => "a\\*'", # small a, acute accent
198 'Acirc' => 'A\\*^', # capital A, circumflex accent
199 'acirc' => 'a\\*^', # small a, circumflex accent
200 'AElig' => '\*(AE', # capital AE diphthong (ligature)
201 'aelig' => '\*(ae', # small ae diphthong (ligature)
202 'Agrave' => "A\\*`", # capital A, grave accent
203 'agrave' => "A\\*`", # small a, grave accent
204 'Aring' => 'A\\*o', # capital A, ring
205 'aring' => 'a\\*o', # small a, ring
206 'Atilde' => 'A\\*~', # capital A, tilde
207 'atilde' => 'a\\*~', # small a, tilde
208 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
209 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
210 'Ccedil' => 'C\\*,', # capital C, cedilla
211 'ccedil' => 'c\\*,', # small c, cedilla
212 'Eacute' => "E\\*'", # capital E, acute accent
213 'eacute' => "e\\*'", # small e, acute accent
214 'Ecirc' => 'E\\*^', # capital E, circumflex accent
215 'ecirc' => 'e\\*^', # small e, circumflex accent
216 'Egrave' => 'E\\*`', # capital E, grave accent
217 'egrave' => 'e\\*`', # small e, grave accent
218 'ETH' => '\\*(D-', # capital Eth, Icelandic
219 'eth' => '\\*(d-', # small eth, Icelandic
220 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
221 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
222 'Iacute' => "I\\*'", # capital I, acute accent
223 'iacute' => "i\\*'", # small i, acute accent
224 'Icirc' => 'I\\*^', # capital I, circumflex accent
225 'icirc' => 'i\\*^', # small i, circumflex accent
226 'Igrave' => 'I\\*`', # capital I, grave accent
227 'igrave' => 'i\\*`', # small i, grave accent
228 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
229 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
230 'Ntilde' => 'N\*~', # capital N, tilde
231 'ntilde' => 'n\*~', # small n, tilde
232 'Oacute' => "O\\*'", # capital O, acute accent
233 'oacute' => "o\\*'", # small o, acute accent
234 'Ocirc' => 'O\\*^', # capital O, circumflex accent
235 'ocirc' => 'o\\*^', # small o, circumflex accent
236 'Ograve' => 'O\\*`', # capital O, grave accent
237 'ograve' => 'o\\*`', # small o, grave accent
238 'Oslash' => 'O\\*/', # capital O, slash
239 'oslash' => 'o\\*/', # small o, slash
240 'Otilde' => 'O\\*~', # capital O, tilde
241 'otilde' => 'o\\*~', # small o, tilde
242 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
243 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
244 'szlig' => '\*8', # small sharp s, German (sz ligature)
245 'THORN' => '\\*(Th', # capital THORN, Icelandic
246 'thorn' => '\\*(th', # small thorn, Icelandic
247 'Uacute' => "U\\*'", # capital U, acute accent
248 'uacute' => "u\\*'", # small u, acute accent
249 'Ucirc' => 'U\\*^', # capital U, circumflex accent
250 'ucirc' => 'u\\*^', # small u, circumflex accent
251 'Ugrave' => 'U\\*`', # capital U, grave accent
252 'ugrave' => 'u\\*`', # small u, grave accent
253 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
254 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
255 'Yacute' => "Y\\*'", # capital Y, acute accent
256 'yacute' => "y\\*'", # small y, acute accent
257 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
bf202ccd 258
259 'nbsp' => '\\ ', # non-breaking space
260 'shy' => '', # soft (discretionary) hyphen
9741dab0 261);
262
263
3c014959 264##############################################################################
9741dab0 265# Static helper functions
3c014959 266##############################################################################
9741dab0 267
3c014959 268# Protect leading quotes and periods against interpretation as commands. Also
269# protect anything starting with a backslash, since it could expand or hide
270# something that *roff would interpret as a command. This is overkill, but
271# it's much simpler than trying to parse *roff here.
c9abbd5d 272sub protect {
273 local $_ = shift;
44464a02 274 s/^([.\'\\])/\\&$1/mg;
c9abbd5d 275 $_;
276}
5cdeb5a2 277
9741dab0 278# Translate a font string into an escape.
279sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
280
5cdeb5a2 281
3c014959 282##############################################################################
9741dab0 283# Initialization
3c014959 284##############################################################################
285
286# Initialize the object. Here, we also process any additional options passed
287# to the constructor or set up defaults if none were given. center is the
288# centered title, release is the version number, and date is the date for the
289# documentation. Note that we can't know what file name we're processing due
290# to the architecture of Pod::Parser, so that *has* to either be passed to the
291# constructor or set separately with Pod::Man::name().
9741dab0 292sub initialize {
293 my $self = shift;
294
3c014959 295 # Figure out the fixed-width font. If user-supplied, make sure that they
296 # are the right length.
9741dab0 297 for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
298 if (defined $$self{$_}) {
299 if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
ab1f1d91 300 croak qq(roff font should be 1 or 2 chars,)
301 . qq( not "$$self{$_}");
9741dab0 302 }
303 } else {
304 $$self{$_} = '';
305 }
306 }
307
3c014959 308 # Set the default fonts. We can't be sure what fixed bold-italic is going
309 # to be called, so default to just bold.
9741dab0 310 $$self{fixed} ||= 'CW';
311 $$self{fixedbold} ||= 'CB';
312 $$self{fixeditalic} ||= 'CI';
313 $$self{fixedbolditalic} ||= 'CB';
314
3c014959 315 # Set up a table of font escapes. First number is fixed-width, second is
316 # bold, third is italic.
9741dab0 317 $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
318 '010' => '\fB', '011' => '\f(BI',
319 '100' => toescape ($$self{fixed}),
320 '101' => toescape ($$self{fixeditalic}),
321 '110' => toescape ($$self{fixedbold}),
322 '111' => toescape ($$self{fixedbolditalic})};
323
324 # Extra stuff for page titles.
325 $$self{center} = 'User Contributed Perl Documentation'
326 unless defined $$self{center};
327 $$self{indent} = 4 unless defined $$self{indent};
328
3c014959 329 # We used to try first to get the version number from a local binary, but
330 # we shouldn't need that any more. Get the version from the running Perl.
331 # Work a little magic to handle subversions correctly under both the
332 # pre-5.6 and the post-5.6 version numbering schemes.
9741dab0 333 if (!defined $$self{release}) {
c9abbd5d 334 my @version = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
335 $version[2] ||= 0;
336 $version[2] *= 10 ** (3 - length $version[2]);
337 for (@version) { $_ += 0 }
338 $$self{release} = 'perl v' . join ('.', @version);
9741dab0 339 }
340
341 # Double quotes in things that will be quoted.
c9abbd5d 342 for (qw/center date release/) {
343 $$self{$_} =~ s/\"/\"\"/g if $$self{$_};
344 }
9741dab0 345
ab1f1d91 346 # Figure out what quotes we'll be using for C<> text.
5cdeb5a2 347 $$self{quotes} ||= '"';
ab1f1d91 348 if ($$self{quotes} eq 'none') {
349 $$self{LQUOTE} = $$self{RQUOTE} = '';
350 } elsif (length ($$self{quotes}) == 1) {
351 $$self{LQUOTE} = $$self{RQUOTE} = $$self{quotes};
352 } elsif ($$self{quotes} =~ /^(.)(.)$/
353 || $$self{quotes} =~ /^(..)(..)$/) {
354 $$self{LQUOTE} = $1;
355 $$self{RQUOTE} = $2;
356 } else {
357 croak qq(Invalid quote specification "$$self{quotes}");
358 }
359
3c014959 360 # Double the first quote; note that this should not be s///g as two double
361 # quotes is represented in *roff as three double quotes, not four. Weird,
362 # I know.
ab1f1d91 363 $$self{LQUOTE} =~ s/\"/\"\"/;
364 $$self{RQUOTE} =~ s/\"/\"\"/;
365
9741dab0 366 $self->SUPER::initialize;
367}
368
ab1f1d91 369# For each document we process, output the preamble first.
9741dab0 370sub begin_pod {
371 my $self = shift;
372
373 # Try to figure out the name and section from the file name.
374 my $section = $$self{section} || 1;
375 my $name = $$self{name};
376 if (!defined $name) {
377 $name = $self->input_file;
fe6f1558 378 $section = 3 if (!$$self{section} && $name =~ /\.pm\z/i);
379 $name =~ s/\.p(od|[lm])\z//i;
bf202ccd 380 if ($section !~ /^3/) {
9741dab0 381 require File::Basename;
382 $name = uc File::Basename::basename ($name);
383 } else {
bf202ccd 384 # Assume that we're dealing with a module. We want to figure out
385 # the full module name from the path to the file, but we don't
386 # want to include too much of the path into the module name. Lose
387 # everything up to the first of:
388 #
389 # */lib/*perl*/ standard or site_perl module
390 # */*perl*/lib/ from -Dprefix=/opt/perl
391 # */*perl*/ random module hierarchy
392 #
393 # which works. Also strip off a leading site or site_perl
394 # component, any OS-specific component, and any version number
395 # component, and strip off an initial component of "lib" or
396 # "blib/lib" since that's what ExtUtils::MakeMaker creates.
397 # splitdir requires at least File::Spec 0.8.
398 require File::Spec;
399 my ($volume, $dirs, $file) = File::Spec->splitpath ($name);
400 my @dirs = File::Spec->splitdir ($dirs);
401 my $cut = 0;
402 my $i;
403 for ($i = 0; $i < scalar @dirs; $i++) {
404 if ($dirs[$i] eq 'lib' && $dirs[$i + 1] =~ /perl/) {
405 $cut = $i + 2;
406 last;
407 } elsif ($dirs[$i] =~ /perl/) {
408 $cut = $i + 1;
409 $cut++ if $dirs[$i + 1] eq 'lib';
410 last;
9741dab0 411 }
9741dab0 412 }
bf202ccd 413 if ($cut > 0) {
414 splice (@dirs, 0, $cut);
415 shift @dirs if ($dirs[0] =~ /^site(_perl)?$/);
416 shift @dirs if ($dirs[0] =~ /^[\d.]+$/);
b616daaf 417 shift @dirs if ($dirs[0] =~ /^(.*-$^O|$^O-.*|$^O)$/);
bf202ccd 418 }
419 shift @dirs if $dirs[0] eq 'lib';
420 splice (@dirs, 0, 2) if ($dirs[0] eq 'blib' && $dirs[1] eq 'lib');
421
422 # Remove empty directories when building the module name; they
423 # occur too easily on Unix by doubling slashes.
424 $name = join ('::', (grep { $_ ? $_ : () } @dirs), $file);
9741dab0 425 }
426 }
427
3c014959 428 # If $name contains spaces, quote it; this mostly comes up in the case of
429 # input from stdin.
77003bb1 430 $name = '"' . $name . '"' if ($name =~ /\s/);
431
9741dab0 432 # Modification date header. Try to use the modification time of our
433 # input.
434 if (!defined $$self{date}) {
435 my $time = (stat $self->input_file)[9] || time;
436 my ($day, $month, $year) = (localtime $time)[3,4,5];
437 $month++;
438 $year += 1900;
c9abbd5d 439 $$self{date} = sprintf ('%4d-%02d-%02d', $year, $month, $day);
9741dab0 440 }
441
bf202ccd 442 # Now, print out the preamble and the title. The meaning of the arguments
443 # to .TH unfortunately vary by system; some systems consider the fourth
444 # argument to be a "source" and others use it as a version number.
445 # Generally it's just presented as the left-side footer, though, so it
446 # doesn't matter too much if a particular system gives it another
447 # interpretation.
448 #
449 # The order of date and release used to be reversed in older versions of
450 # this module, but this order is correct for both Solaris and Linux.
ab1f1d91 451 local $_ = $PREAMBLE;
452 s/\@CFONT\@/$$self{fixed}/;
453 s/\@LQUOTE\@/$$self{LQUOTE}/;
454 s/\@RQUOTE\@/$$self{RQUOTE}/;
455 chomp $_;
bf202ccd 456 my $pversion = $Pod::Parser::VERSION;
9741dab0 457 print { $self->output_handle } <<"----END OF HEADER----";
bf202ccd 458.\\" Automatically generated by Pod::Man v$VERSION, Pod::Parser v$pversion
9741dab0 459.\\"
460.\\" Standard preamble:
3c014959 461.\\" ========================================================================
ab1f1d91 462$_
3c014959 463.\\" ========================================================================
9741dab0 464.\\"
465.IX Title "$name $section"
bf202ccd 466.TH $name $section "$$self{date}" "$$self{release}" "$$self{center}"
9741dab0 467.UC
468----END OF HEADER----
9741dab0 469
470 # Initialize a few per-file variables.
b616daaf 471 $$self{INDENT} = 0; # Current indentation level.
472 $$self{INDENTS} = []; # Stack of indentations.
473 $$self{INDEX} = []; # Index keys waiting to be printed.
474 $$self{ITEMS} = 0; # The number of consecutive =items.
475 $$self{SHIFTWAIT} = 0; # Whether there is a shift waiting.
476 $$self{SHIFTS} = []; # Stack of .RS shifts.
9741dab0 477}
478
479
3c014959 480##############################################################################
9741dab0 481# Core overrides
3c014959 482##############################################################################
9741dab0 483
484# Called for each command paragraph. Gets the command, the associated
485# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
486# the command to a method named the same as the command. =cut is handled
487# internally by Pod::Parser.
488sub command {
489 my $self = shift;
490 my $command = shift;
491 return if $command eq 'pod';
3c014959 492 return if ($$self{EXCLUDE} && $command ne 'end');
ab1f1d91 493 if ($self->can ('cmd_' . $command)) {
494 $command = 'cmd_' . $command;
844b31e3 495 $self->$command (@_);
3c014959 496 } else {
ab1f1d91 497 my ($text, $line, $paragraph) = @_;
5cdeb5a2 498 my $file;
499 ($file, $line) = $paragraph->file_line;
ab1f1d91 500 $text =~ s/\n+\z//;
501 $text = " $text" if ($text =~ /^\S/);
502 warn qq($file:$line: Unknown command paragraph "=$command$text"\n);
503 return;
844b31e3 504 }
9741dab0 505}
506
3c014959 507# Called for a verbatim paragraph. Gets the paragraph, the line number, and a
508# Pod::Paragraph object. Rofficate backslashes, untabify, put a zero-width
509# character at the beginning of each line to protect against commands, and
510# wrap in .Vb/.Ve.
9741dab0 511sub verbatim {
512 my $self = shift;
513 return if $$self{EXCLUDE};
514 local $_ = shift;
515 return if /^\s+$/;
516 s/\s+$/\n/;
517 my $lines = tr/\n/\n/;
518 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
519 s/\\/\\e/g;
520 s/^(\s*\S)/'\&' . $1/gme;
5cdeb5a2 521 $self->makespace;
9741dab0 522 $self->output (".Vb $lines\n$_.Ve\n");
523 $$self{NEEDSPACE} = 0;
524}
525
3c014959 526# Called for a regular text block. Gets the paragraph, the line number, and a
527# Pod::Paragraph object. Perform interpolation and output the results.
9741dab0 528sub textblock {
529 my $self = shift;
530 return if $$self{EXCLUDE};
531 $self->output ($_[0]), return if $$self{VERBATIM};
532
bf202ccd 533 # Parse the tree. collapse knows about references to scalars as well as
534 # scalars and does the right thing with them. Tidy up any trailing
535 # whitespace.
c9abbd5d 536 my $text = shift;
c9abbd5d 537 $text = $self->parse ($text, @_);
538 $text =~ s/\n\s*$/\n/;
bf202ccd 539
540 # Output the paragraph. We also have to handle =over without =item. If
541 # there's an =over without =item, NEWINDENT will be set, and we need to
542 # handle creation of the indent here. Set WEIRDINDENT so that it will be
543 # cleaned up on =back.
5cdeb5a2 544 $self->makespace;
b616daaf 545 if ($$self{SHIFTWAIT}) {
bf202ccd 546 $self->output (".RS $$self{INDENT}\n");
b616daaf 547 push (@{ $$self{SHIFTS} }, $$self{INDENT});
548 $$self{SHIFTWAIT} = 0;
bf202ccd 549 }
50a3fd2a 550 $self->output (protect $self->textmapfonts ($text));
9741dab0 551 $self->outindex;
552 $$self{NEEDSPACE} = 1;
553}
554
555# Called for an interior sequence. Takes a Pod::InteriorSequence object and
556# returns a reference to a scalar. This scalar is the final formatted text.
3c014959 557# It's returned as a reference so that other interior sequences above us know
558# that the text has already been processed.
9741dab0 559sub sequence {
560 my ($self, $seq) = @_;
561 my $command = $seq->cmd_name;
562
bf202ccd 563 # We have to defer processing of the inside of an L<> formatting code. If
564 # this sequence is nested inside an L<> sequence, return the literal raw
565 # text of it.
566 my $parent = $seq->nested;
567 while (defined $parent) {
568 return $seq->raw_text if ($parent->cmd_name eq 'L');
569 $parent = $parent->nested;
570 }
571
9741dab0 572 # Zero-width characters.
f5daac4a 573 if ($command eq 'Z') {
c9abbd5d 574 # Workaround to generate a blessable reference, needed by 5.005.
575 my $tmp = '\&';
576 return bless \ "$tmp", 'Pod::Man::String';
f5daac4a 577 }
9741dab0 578
a3e04946 579 # C<>, L<>, X<>, and E<> don't apply guesswork to their contents. C<>
580 # needs some additional special handling.
581 my $literal = ($command =~ /^[CELX]$/);
582 $literal++ if $command eq 'C';
583 local $_ = $self->collapse ($seq->parse_tree, $literal);
9741dab0 584
bf202ccd 585 # Handle E<> escapes. Numeric escapes that match one of the supported ISO
586 # 8859-1 characters don't work at present.
9741dab0 587 if ($command eq 'E') {
2e20e14f 588 if (/^\d+$/) {
589 return bless \ chr ($_), 'Pod::Man::String';
590 } elsif (exists $ESCAPES{$_}) {
9741dab0 591 return bless \ "$ESCAPES{$_}", 'Pod::Man::String';
592 } else {
aa212ad6 593 my ($file, $line) = $seq->file_line;
594 warn "$file:$line: Unknown escape E<$_>\n";
9741dab0 595 return bless \ "E<$_>", 'Pod::Man::String';
596 }
597 }
598
599 # For all the other sequences, empty content produces no output.
600 return '' if $_ eq '';
601
602 # Handle formatting sequences.
603 if ($command eq 'B') {
604 return bless \ ('\f(BS' . $_ . '\f(BE'), 'Pod::Man::String';
605 } elsif ($command eq 'F') {
606 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
607 } elsif ($command eq 'I') {
608 return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
609 } elsif ($command eq 'C') {
3c014959 610 # A bug in lvalue subs in 5.6 requires the temporary variable.
611 my $tmp = $self->quote_literal ($_);
612 return bless \ "$tmp", 'Pod::Man::String';
9741dab0 613 }
614
615 # Handle links.
616 if ($command eq 'L') {
bf202ccd 617 my ($text, $type) = (parselink ($_))[1,4];
618 return '' unless $text;
619 my ($file, $line) = $seq->file_line;
620 $text = $self->parse ($text, $line);
621 $text = '<' . $text . '>' if $type eq 'url';
622 return bless \ "$text", 'Pod::Man::String';
9741dab0 623 }
5cdeb5a2 624
9741dab0 625 # Whitespace protection replaces whitespace with "\ ".
626 if ($command eq 'S') {
627 s/\s+/\\ /g;
628 return bless \ "$_", 'Pod::Man::String';
629 }
630
631 # Add an index entry to the list of ones waiting to be output.
632 if ($command eq 'X') { push (@{ $$self{INDEX} }, $_); return '' }
633
634 # Anything else is unknown.
aa212ad6 635 my ($file, $line) = $seq->file_line;
636 warn "$file:$line: Unknown sequence $command<$_>\n";
9741dab0 637}
638
639
3c014959 640##############################################################################
9741dab0 641# Command paragraphs
3c014959 642##############################################################################
9741dab0 643
644# All command paragraphs take the paragraph and the line number.
645
646# First level heading. We can't output .IX in the NAME section due to a bug
647# in some versions of catman, so don't output a .IX for that section. .SH
3c014959 648# already uses small caps, so remove any E<> sequences that would cause them.
9741dab0 649sub cmd_head1 {
650 my $self = shift;
651 local $_ = $self->parse (@_);
652 s/\s+$//;
653 s/\\s-?\d//g;
77003bb1 654 s/\s*\n\s*/ /g;
5cdeb5a2 655 if ($$self{ITEMS} > 1) {
656 $$self{ITEMS} = 0;
657 $self->output (".PD\n");
658 }
50a3fd2a 659 $self->output ($self->switchquotes ('.SH', $self->mapfonts ($_)));
9741dab0 660 $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
661 $$self{NEEDSPACE} = 0;
662}
663
664# Second level heading.
665sub cmd_head2 {
666 my $self = shift;
667 local $_ = $self->parse (@_);
668 s/\s+$//;
77003bb1 669 s/\s*\n\s*/ /g;
5cdeb5a2 670 if ($$self{ITEMS} > 1) {
671 $$self{ITEMS} = 0;
672 $self->output (".PD\n");
673 }
50a3fd2a 674 $self->output ($self->switchquotes ('.Sh', $self->mapfonts ($_)));
9741dab0 675 $self->outindex ('Subsection', $_);
676 $$self{NEEDSPACE} = 0;
677}
678
50a3fd2a 679# Third level heading.
680sub cmd_head3 {
681 my $self = shift;
682 local $_ = $self->parse (@_);
683 s/\s+$//;
77003bb1 684 s/\s*\n\s*/ /g;
50a3fd2a 685 if ($$self{ITEMS} > 1) {
686 $$self{ITEMS} = 0;
687 $self->output (".PD\n");
688 }
689 $self->makespace;
b616daaf 690 $self->output ($self->textmapfonts ('\f(IS' . $_ . '\f(IE') . "\n");
50a3fd2a 691 $self->outindex ('Subsection', $_);
692 $$self{NEEDSPACE} = 1;
693}
694
695# Fourth level heading.
696sub cmd_head4 {
697 my $self = shift;
698 local $_ = $self->parse (@_);
699 s/\s+$//;
77003bb1 700 s/\s*\n\s*/ /g;
50a3fd2a 701 if ($$self{ITEMS} > 1) {
702 $$self{ITEMS} = 0;
703 $self->output (".PD\n");
704 }
705 $self->makespace;
706 $self->output ($self->textmapfonts ($_) . "\n");
707 $self->outindex ('Subsection', $_);
708 $$self{NEEDSPACE} = 1;
709}
710
9741dab0 711# Start a list. For indents after the first, wrap the outside indent in .RS
712# so that hanging paragraph tags will be correct.
713sub cmd_over {
714 my $self = shift;
715 local $_ = shift;
716 unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
b616daaf 717 if (@{ $$self{SHIFTS} } < @{ $$self{INDENTS} }) {
9741dab0 718 $self->output (".RS $$self{INDENT}\n");
b616daaf 719 push (@{ $$self{SHIFTS} }, $$self{INDENT});
9741dab0 720 }
721 push (@{ $$self{INDENTS} }, $$self{INDENT});
722 $$self{INDENT} = ($_ + 0);
b616daaf 723 $$self{SHIFTWAIT} = 1;
9741dab0 724}
725
726# End a list. If we've closed an embedded indent, we've mangled the hanging
727# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
728# We'll close that .RS at the next =back or =item.
729sub cmd_back {
730 my $self = shift;
731 $$self{INDENT} = pop @{ $$self{INDENTS} };
732 unless (defined $$self{INDENT}) {
aa212ad6 733 my ($file, $line, $paragraph) = @_;
734 ($file, $line) = $paragraph->file_line;
735 warn "$file:$line: Unmatched =back\n";
9741dab0 736 $$self{INDENT} = 0;
737 }
b616daaf 738 if (@{ $$self{SHIFTS} } > @{ $$self{INDENTS} }) {
9741dab0 739 $self->output (".RE\n");
b616daaf 740 pop @{ $$self{SHIFTS} };
9741dab0 741 }
742 if (@{ $$self{INDENTS} } > 0) {
743 $self->output (".RE\n");
744 $self->output (".RS $$self{INDENT}\n");
9741dab0 745 }
746 $$self{NEEDSPACE} = 1;
b616daaf 747 $$self{SHIFTWAIT} = 0;
9741dab0 748}
749
750# An individual list item. Emit an index entry for anything that's
751# interesting, but don't emit index entries for things like bullets and
3c014959 752# numbers. rofficate bullets too while we're at it (so for nice output, use *
753# for your lists rather than o or . or - or some other thing). Newlines in an
754# item title are turned into spaces since *roff can't handle them embedded.
9741dab0 755sub cmd_item {
756 my $self = shift;
757 local $_ = $self->parse (@_);
758 s/\s+$//;
46bce7d0 759 s/\s*\n\s*/ /g;
9741dab0 760 my $index;
761 if (/\w/ && !/^\w[.\)]\s*$/) {
762 $index = $_;
fe2227f0 763 $index =~ s/^\s*[-*+o.]?(?:\s+|\Z)//;
9741dab0 764 }
bf202ccd 765 $_ = '*' unless $_;
9741dab0 766 s/^\*(\s|\Z)/\\\(bu$1/;
b616daaf 767 if (@{ $$self{SHIFTS} } == @{ $$self{INDENTS} }) {
9741dab0 768 $self->output (".RE\n");
b616daaf 769 pop @{ $$self{SHIFTS} };
9741dab0 770 }
50a3fd2a 771 $_ = $self->textmapfonts ($_);
5cdeb5a2 772 $self->output (".PD 0\n") if ($$self{ITEMS} == 1);
3c014959 773 $self->output ($self->switchquotes ('.IP', $_, $$self{INDENT}));
9741dab0 774 $self->outindex ($index ? ('Item', $index) : ());
775 $$self{NEEDSPACE} = 0;
5cdeb5a2 776 $$self{ITEMS}++;
b616daaf 777 $$self{SHIFTWAIT} = 0;
9741dab0 778}
779
780# Begin a block for a particular translator. Setting VERBATIM triggers
781# special handling in textblock().
782sub cmd_begin {
783 my $self = shift;
784 local $_ = shift;
785 my ($kind) = /^(\S+)/ or return;
786 if ($kind eq 'man' || $kind eq 'roff') {
787 $$self{VERBATIM} = 1;
788 } else {
789 $$self{EXCLUDE} = 1;
790 }
791}
792
793# End a block for a particular translator. We assume that all =begin/=end
794# pairs are properly closed.
795sub cmd_end {
796 my $self = shift;
797 $$self{EXCLUDE} = 0;
798 $$self{VERBATIM} = 0;
799}
800
801# One paragraph for a particular translator. Ignore it unless it's intended
802# for man or roff, in which case we output it verbatim.
803sub cmd_for {
804 my $self = shift;
805 local $_ = shift;
9741dab0 806 return unless s/^(?:man|roff)\b[ \t]*\n?//;
807 $self->output ($_);
808}
809
810
3c014959 811##############################################################################
9741dab0 812# Escaping and fontification
3c014959 813##############################################################################
9741dab0 814
815# At this point, we'll have embedded font codes of the form \f(<font>[SE]
3c014959 816# where <font> is one of B, I, or F. Turn those into the right font start or
817# end codes. The old pod2man didn't get B<someI<thing> else> right; after I<>
818# it switched back to normal text rather than bold. We take care of this by
819# using variables as a combined pointer to our current font sequence, and set
820# each to the number of current nestings of start tags for that font. Use
821# them as a vector to look up what font sequence to use.
50a3fd2a 822#
823# \fP changes to the previous font, but only one previous font is kept. We
824# don't know what the outside level font is; normally it's R, but if we're
3c014959 825# inside a heading it could be something else. So arrange things so that the
826# outside font is always the "previous" font and end with \fP instead of \fR.
827# Idea from Zack Weinberg.
9741dab0 828sub mapfonts {
829 my $self = shift;
830 local $_ = shift;
831
832 my ($fixed, $bold, $italic) = (0, 0, 0);
833 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
50a3fd2a 834 my $last = '\fR';
835 s { \\f\((.)(.) } {
836 my $sequence = '';
837 my $f;
838 if ($last ne '\fR') { $sequence = '\fP' }
839 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
840 $f = $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
841 if ($f eq $last) {
842 '';
843 } else {
844 if ($f ne '\fR') { $sequence .= $f }
845 $last = $f;
846 $sequence;
847 }
848 }gxe;
849 $_;
850}
851
852# Unfortunately, there is a bug in Solaris 2.6 nroff (not present in GNU
853# groff) where the sequence \fB\fP\f(CW\fP leaves the font set to B rather
3c014959 854# than R, presumably because \f(CW doesn't actually do a font change. To work
855# around this, use a separate textmapfonts for text blocks where the default
856# font is always R and only use the smart mapfonts for headings.
50a3fd2a 857sub textmapfonts {
858 my $self = shift;
859 local $_ = shift;
860
861 my ($fixed, $bold, $italic) = (0, 0, 0);
862 my %magic = (F => \$fixed, B => \$bold, I => \$italic);
9741dab0 863 s { \\f\((.)(.) } {
864 ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
865 $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
866 }gxe;
867 $_;
868}
869
870
3c014959 871##############################################################################
872# *roff-specific parsing and magic
873##############################################################################
9741dab0 874
875# Called instead of parse_text, calls parse_text with the right flags.
876sub parse {
877 my $self = shift;
878 $self->parse_text ({ -expand_seq => 'sequence',
879 -expand_ptree => 'collapse' }, @_);
880}
5cdeb5a2 881
9741dab0 882# Takes a parse tree and a flag saying whether or not to treat it as literal
3c014959 883# text (not call guesswork on it), and returns the concatenation of all of the
884# text strings in that parse tree. If the literal flag isn't true,
9741dab0 885# guesswork() will be called on all plain scalars in the parse tree.
3c014959 886# Otherwise, just escape backslashes in the normal case. If collapse is being
887# called on a C<> sequence, literal is set to 2, and we do some additional
888# cleanup. Assumes that everything in the parse tree is either a scalar or a
889# reference to a scalar.
9741dab0 890sub collapse {
891 my ($self, $ptree, $literal) = @_;
892 if ($literal) {
893 return join ('', map {
894 if (ref $_) {
895 $$_;
896 } else {
bf202ccd 897 s/\\/\\e/g if $literal > 1;
a3e04946 898 s/-/\\-/g if $literal > 1;
899 s/__/_\\|_/g if $literal > 1;
9741dab0 900 $_;
901 }
902 } $ptree->children);
903 } else {
904 return join ('', map {
905 ref ($_) ? $$_ : $self->guesswork ($_)
906 } $ptree->children);
907 }
908}
909
910# Takes a text block to perform guesswork on; this is guaranteed not to
3c014959 911# contain any interior sequences. Returns the text block with remapping done.
9741dab0 912sub guesswork {
913 my $self = shift;
914 local $_ = shift;
915
916 # rofficate backslashes.
917 s/\\/\\e/g;
918
919 # Ensure double underbars have a tiny space between them.
920 s/__/_\\|_/g;
921
bf202ccd 922 # Leave hyphens only if they're part of regular words and there is only
923 # one dash at a time. Leave a dash after the first character as a regular
924 # non-breaking dash, but don't let it mark the rest of the word invalid
925 # for hyphenation.
926 s/-/\\-/g;
927 s{
928 ( (?:\G|^|\s) [a-zA-Z] ) ( \\- )?
929 ( (?: [a-zA-Z]+ \\-)+ )
930 ( [a-zA-Z]+ ) (?=\s|\Z)
931 \b
932 } {
933 my ($prefix, $hyphen, $main, $suffix) = ($1, $2, $3, $4);
934 $hyphen ||= '';
935 $main =~ s/\\-/-/g;
936 $prefix . $hyphen . $main . $suffix;
937 }egx;
938
939 # Translate -- into a real em dash if it's used like one.
940 s{ (\s) \\-\\- (\s) } { $1 . '\*(--' . $2 }egx;
941 s{ (\b[a-zA-Z]+) \\-\\- (\s|\Z|[a-zA-Z]+\b) } { $1 . '\*(--' . $2 }egx;
942
3c014959 943 # Make all caps a little smaller. Be careful here, since we don't want to
944 # make @ARGV into small caps, nor do we want to fix the MIME in
9741dab0 945 # MIME-Version, since it looks weird with the full-height V.
946 s{
947 ( ^ | [\s\(\"\'\`\[\{<>] )
bf202ccd 948 ( [A-Z] [A-Z] (?: [/A-Z+:\d_\$&] | \\- )* )
949 (?= [\s>\}\]\(\)\'\".?!,;] | \\*\(-- | $ )
c9abbd5d 950 } { $1 . '\s-1' . $2 . '\s0' }egx;
9741dab0 951
9741dab0 952 # Italize functions in the form func().
953 s{
ee89c1da 954 ( \b | \\s-1 )
9741dab0 955 (
ee89c1da 956 [A-Za-z_] ([:\w]|\\s-?[01])+ \(\)
9741dab0 957 )
ee89c1da 958 } { $1 . '\f(IS' . $2 . '\f(IE' }egx;
9741dab0 959
960 # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
961 s{
ee89c1da 962 ( \b | \\s-1 )
bf202ccd 963 ( [A-Za-z_] (?:[.:\w]|\\-|\\s-?[01])+ )
9741dab0 964 (
ee89c1da 965 \( \d [a-z]* \)
9741dab0 966 )
ee89c1da 967 } { $1 . '\f(IS' . $2 . '\f(IE\|' . $3 }egx;
9741dab0 968
969 # Convert simple Perl variable references to a fixed-width font.
970 s{
971 ( \s+ )
972 ( [\$\@%] [\w:]+ )
973 (?! \( )
974 } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
975
9741dab0 976 # Fix up double quotes.
977 s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
978
979 # Make C++ into \*(C+, which is a squinched version.
980 s{ \b C\+\+ } {\\*\(C+}gx;
981
982 # All done.
983 $_;
984}
985
3c014959 986# Handles C<> text, deciding whether to put \*C` around it or not. This is a
987# whole bunch of messy heuristics to try to avoid overquoting, originally from
988# Barrie Slaymaker. This largely duplicates similar code in Pod::Text.
989sub quote_literal {
990 my $self = shift;
991 local $_ = shift;
992
993 # A regex that matches the portion of a variable reference that's the
994 # array or hash index, separated out just because we want to use it in
995 # several places in the following regex.
996 my $index = '(?: \[.*\] | \{.*\} )?';
997
998 # Check for things that we don't want to quote, and if we find any of
999 # them, return the string with just a font change and no quoting.
1000 m{
1001 ^\s*
1002 (?:
1003 ( [\'\`\"] ) .* \1 # already quoted
1004 | \` .* \' # `quoted'
1005 | \$+ [\#^]? \S $index # special ($^Foo, $")
1006 | [\$\@%&*]+ \#? [:\'\w]+ $index # plain var or func
1007 | [\$\@%&*]* [:\'\w]+ (?: -> )? \(\s*[^\s,]\s*\) # 0/1-arg func call
1008 | [+-]? [\d.]+ (?: [eE] [+-]? \d+ )? # a number
1009 | 0x [a-fA-F\d]+ # a hex constant
1010 )
1011 \s*\z
1012 }xo && return '\f(FS' . $_ . '\f(FE';
1013
1014 # If we didn't return, go ahead and quote the text.
1015 return '\f(FS\*(C`' . $_ . "\\*(C'\\f(FE";
1016}
1017
9741dab0 1018
3c014959 1019##############################################################################
9741dab0 1020# Output formatting
3c014959 1021##############################################################################
9741dab0 1022
1023# Make vertical whitespace.
1024sub makespace {
1025 my $self = shift;
5cdeb5a2 1026 $self->output (".PD\n") if ($$self{ITEMS} > 1);
1027 $$self{ITEMS} = 0;
1028 $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n")
1029 if $$self{NEEDSPACE};
9741dab0 1030}
1031
3c014959 1032# Output any pending index entries, and optionally an index entry given as an
1033# argument. Support multiple index entries in X<> separated by slashes, and
1034# strip special escapes from index entries.
9741dab0 1035sub outindex {
1036 my ($self, $section, $index) = @_;
1037 my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
1038 return unless ($section || @entries);
1039 $$self{INDEX} = [];
b616daaf 1040 my @output;
9741dab0 1041 if (@entries) {
b616daaf 1042 push (@output, [ 'Xref', join (' ', @entries) ]);
9741dab0 1043 }
1044 if ($section) {
9741dab0 1045 $index =~ s/\\-/-/g;
1046 $index =~ s/\\(?:s-?\d|.\(..|.)//g;
b616daaf 1047 push (@output, [ $section, $index ]);
1048 }
1049 for (@output) {
1050 my ($type, $entry) = @$_;
1051 $entry =~ s/\"/\"\"/g;
1052 $self->output (".IX $type " . '"' . $entry . '"' . "\n");
9741dab0 1053 }
9741dab0 1054}
1055
1056# Output text to the output device.
1057sub output { print { $_[0]->output_handle } $_[1] }
1058
50a3fd2a 1059# Given a command and a single argument that may or may not contain double
1060# quotes, handle double-quote formatting for it. If there are no double
1061# quotes, just return the command followed by the argument in double quotes.
1062# If there are double quotes, use an if statement to test for nroff, and for
1063# nroff output the command followed by the argument in double quotes with
1064# embedded double quotes doubled. For other formatters, remap paired double
73849855 1065# quotes to LQUOTE and RQUOTE.
50a3fd2a 1066sub switchquotes {
1067 my $self = shift;
1068 my $command = shift;
1069 local $_ = shift;
1070 my $extra = shift;
1071 s/\\\*\([LR]\"/\"/g;
1072
1073 # We also have to deal with \*C` and \*C', which are used to add the
1074 # quotes around C<> text, since they may expand to " and if they do this
b616daaf 1075 # confuses the .SH macros and the like no end. Expand them ourselves.
1076 # Also separate troff from nroff if there are any fixed-width fonts in use
1077 # to work around problems with Solaris nroff.
50a3fd2a 1078 my $c_is_quote = ($$self{LQUOTE} =~ /\"/) || ($$self{RQUOTE} =~ /\"/);
b616daaf 1079 my $fixedpat = join ('|', @{ $$self{FONTS} }{'100', '101', '110', '111'});
1080 $fixedpat =~ s/\\/\\\\/g;
1081 $fixedpat =~ s/\(/\\\(/g;
1082 if (/\"/ || /$fixedpat/) {
50a3fd2a 1083 s/\"/\"\"/g;
3c014959 1084 my $nroff = $_;
50a3fd2a 1085 my $troff = $_;
1086 $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
3c014959 1087 if ($c_is_quote && /\\\*\(C[\'\`]/) {
1088 $nroff =~ s/\\\*\(C\`/$$self{LQUOTE}/g;
1089 $nroff =~ s/\\\*\(C\'/$$self{RQUOTE}/g;
1090 $troff =~ s/\\\*\(C[\'\`]//g;
1091 }
1092 $nroff = qq("$nroff") . ($extra ? " $extra" : '');
50a3fd2a 1093 $troff = qq("$troff") . ($extra ? " $extra" : '');
3c014959 1094
1095 # Work around the Solaris nroff bug where \f(CW\fP leaves the font set
1096 # to Roman rather than the actual previous font when used in headings.
1097 # troff output may still be broken, but at least we can fix nroff by
b616daaf 1098 # just switching the font changes to the non-fixed versions.
1099 $nroff =~ s/\Q$$self{FONTS}{100}\E(.*)\\f[PR]/$1/g;
1100 $nroff =~ s/\Q$$self{FONTS}{101}\E(.*)\\f([PR])/\\fI$1\\f$2/g;
1101 $nroff =~ s/\Q$$self{FONTS}{110}\E(.*)\\f([PR])/\\fB$1\\f$2/g;
1102 $nroff =~ s/\Q$$self{FONTS}{111}\E(.*)\\f([PR])/\\f\(BI$1\\f$2/g;
3c014959 1103
59548eca 1104 # Now finally output the command. Only bother with .ie if the nroff
3c014959 1105 # and troff output isn't the same.
1106 if ($nroff ne $troff) {
59548eca 1107 return ".ie n $command $nroff\n.el $command $troff\n";
3c014959 1108 } else {
1109 return "$command $nroff\n";
1110 }
50a3fd2a 1111 } else {
1112 $_ = qq("$_") . ($extra ? " $extra" : '');
1113 return "$command $_\n";
1114 }
1115}
1116
9741dab0 1117__END__
1118
bf202ccd 1119.\" These are some extra bits of roff that I don't want to lose track of but
1120.\" that have been removed from the preamble to make it a bit shorter since
1121.\" they're not currently being used. They're accents and special characters
1122.\" we don't currently have escapes for.
9741dab0 1123.if n \{\
1124. ds ? ?
1125. ds ! !
1126. ds q
1127.\}
1128.if t \{\
1129. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
1130. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
1131. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
1132.\}
1133.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
1134.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
1135.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
1136.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
1137.ds oe o\h'-(\w'o'u*4/10)'e
1138.ds Oe O\h'-(\w'O'u*4/10)'E
1139.if \n(.H>23 .if \n(.V>19 \
1140\{\
1141. ds v \h'-1'\o'\(aa\(ga'
1142. ds _ \h'-1'^
1143. ds . \h'-1'.
1144. ds 3 3
1145. ds oe oe
1146. ds Oe OE
1147.\}
1148
3c014959 1149##############################################################################
9741dab0 1150# Documentation
3c014959 1151##############################################################################
9741dab0 1152
1153=head1 NAME
1154
1155Pod::Man - Convert POD data to formatted *roff input
1156
1157=head1 SYNOPSIS
1158
1159 use Pod::Man;
1160 my $parser = Pod::Man->new (release => $VERSION, section => 8);
1161
1162 # Read POD from STDIN and write to STDOUT.
1163 $parser->parse_from_filehandle;
1164
1165 # Read POD from file.pod and write to file.1.
1166 $parser->parse_from_file ('file.pod', 'file.1');
1167
1168=head1 DESCRIPTION
1169
1170Pod::Man is a module to convert documentation in the POD format (the
1171preferred language for documenting Perl) into *roff input using the man
1172macro set. The resulting *roff code is suitable for display on a terminal
bf202ccd 1173using L<nroff(1)>, normally via L<man(1)>, or printing using L<troff(1)>.
1174It is conventionally invoked using the driver script B<pod2man>, but it can
1175also be used directly.
9741dab0 1176
1177As a derived class from Pod::Parser, Pod::Man supports the same methods and
1178interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
b4558dc4 1179new parser with C<< Pod::Man->new() >> and then calls either
9741dab0 1180parse_from_filehandle() or parse_from_file().
1181
1182new() can take options, in the form of key/value pairs that control the
1183behavior of the parser. See below for details.
1184
1185If no options are given, Pod::Man uses the name of the input file with any
1186trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
1187section 1 unless the file ended in C<.pm> in which case it defaults to
1188section 3, to a centered title of "User Contributed Perl Documentation", to
1189a centered footer of the Perl version it is run with, and to a left-hand
1190footer of the modification date of its input (or the current date if given
1191STDIN for input).
1192
1193Pod::Man assumes that your *roff formatters have a fixed-width font named
1194CW. If yours is called something else (like CR), use the C<fixed> option to
1195specify it. This generally only matters for troff output for printing.
1196Similarly, you can set the fonts used for bold, italic, and bold italic
1197fixed-width output.
1198
1199Besides the obvious pod conversions, Pod::Man also takes care of formatting
bf202ccd 1200func(), func(3), and simple variable references like $foo or @bar so you
9741dab0 1201don't have to use code escapes for them; complex expressions like
1202C<$fred{'stuff'}> will still need to be escaped, though. It also translates
1203dashes that aren't used as hyphens into en dashes, makes long dashes--like
b4558dc4 1204this--into proper em dashes, fixes "paired quotes," makes C++ look right,
1205puts a little space between double underbars, makes ALLCAPS a teeny bit
1206smaller in B<troff>, and escapes stuff that *roff treats as special so that
1207you don't have to.
9741dab0 1208
1209The recognized options to new() are as follows. All options take a single
1210argument.
1211
1212=over 4
1213
1214=item center
1215
1216Sets the centered page header to use instead of "User Contributed Perl
1217Documentation".
1218
1219=item date
1220
1221Sets the left-hand footer. By default, the modification date of the input
1222file will be used, or the current date if stat() can't find that file (the
1223case if the input is from STDIN), and the date will be formatted as
1224YYYY-MM-DD.
1225
1226=item fixed
1227
1228The fixed-width font to use for vertabim text and code. Defaults to CW.
bf202ccd 1229Some systems may want CR instead. Only matters for B<troff> output.
9741dab0 1230
1231=item fixedbold
1232
1233Bold version of the fixed-width font. Defaults to CB. Only matters for
bf202ccd 1234B<troff> output.
9741dab0 1235
1236=item fixeditalic
1237
1238Italic version of the fixed-width font (actually, something of a misnomer,
1239since most fixed-width fonts only have an oblique version, not an italic
bf202ccd 1240version). Defaults to CI. Only matters for B<troff> output.
9741dab0 1241
1242=item fixedbolditalic
1243
1244Bold italic (probably actually oblique) version of the fixed-width font.
1245Pod::Man doesn't assume you have this, and defaults to CB. Some systems
bf202ccd 1246(such as Solaris) have this font available as CX. Only matters for B<troff>
9741dab0 1247output.
1248
bf202ccd 1249=item name
1250
1251Set the name of the manual page. Without this option, the manual name is
1252set to the uppercased base name of the file being converted unless the
1253manual section is 3, in which case the path is parsed to see if it is a Perl
1254module path. If it is, a path like C<.../lib/Pod/Man.pm> is converted into
1255a name like C<Pod::Man>. This option, if given, overrides any automatic
1256determination of the name.
1257
ab1f1d91 1258=item quotes
1259
1260Sets the quote marks used to surround CE<lt>> text. If the value is a
1261single character, it is used as both the left and right quote; if it is two
1262characters, the first character is used as the left quote and the second as
1263the right quoted; and if it is four characters, the first two are used as
1264the left quote and the second two as the right quote.
1265
1266This may also be set to the special value C<none>, in which case no quote
1267marks are added around CE<lt>> text (but the font is still changed for troff
1268output).
1269
9741dab0 1270=item release
1271
1272Set the centered footer. By default, this is the version of Perl you run
bf202ccd 1273Pod::Man under. Note that some system an macro sets assume that the
9741dab0 1274centered footer will be a modification date and will prepend something like
1275"Last modified: "; if this is the case, you may want to set C<release> to
1276the last modified date and C<date> to the version number.
1277
1278=item section
1279
1280Set the section for the C<.TH> macro. The standard section numbering
1281convention is to use 1 for user commands, 2 for system calls, 3 for
1282functions, 4 for devices, 5 for file formats, 6 for games, 7 for
1283miscellaneous information, and 8 for administrator commands. There is a lot
1284of variation here, however; some systems (like Solaris) use 4 for file
1285formats, 5 for miscellaneous information, and 7 for devices. Still others
1286use 1m instead of 8, or some mix of both. About the only section numbers
1287that are reliably consistent are 1, 2, and 3.
1288
1289By default, section 1 will be used unless the file ends in .pm in which case
1290section 3 will be selected.
1291
1292=back
1293
1294The standard Pod::Parser method parse_from_filehandle() takes up to two
1295arguments, the first being the file handle to read POD from and the second
1296being the file handle to write the formatted output to. The first defaults
1297to STDIN if not given, and the second defaults to STDOUT. The method
1298parse_from_file() is almost identical, except that its two arguments are the
1299input and output disk files instead. See L<Pod::Parser> for the specific
1300details.
1301
1302=head1 DIAGNOSTICS
1303
1304=over 4
1305
ab1f1d91 1306=item roff font should be 1 or 2 chars, not "%s"
9741dab0 1307
1308(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
1309wasn't either one or two characters. Pod::Man doesn't support *roff fonts
1310longer than two characters, although some *roff extensions do (the canonical
bf202ccd 1311versions of B<nroff> and B<troff> don't either).
9741dab0 1312
1313=item Invalid link %s
1314
1315(W) The POD source contained a C<LE<lt>E<gt>> sequence that Pod::Man was
1316unable to parse. You should never see this error message; it probably
1317indicates a bug in Pod::Man.
1318
ab1f1d91 1319=item Invalid quote specification "%s"
1320
1321(F) The quote specification given (the quotes option to the constructor) was
1322invalid. A quote specification must be one, two, or four characters long.
1323
1324=item %s:%d: Unknown command paragraph "%s".
1325
1326(W) The POD source contained a non-standard command paragraph (something of
1327the form C<=command args>) that Pod::Man didn't know about. It was ignored.
1328
aa212ad6 1329=item %s:%d: Unknown escape EE<lt>%sE<gt>
9741dab0 1330
1331(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
1332know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
1333
aa212ad6 1334=item %s:%d: Unknown sequence %s
9741dab0 1335
1336(W) The POD source contained a non-standard interior sequence (something of
1337the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
1338
aa212ad6 1339=item %s:%d: Unmatched =back
9741dab0 1340
1341(W) Pod::Man encountered a C<=back> command that didn't correspond to an
1342C<=over> command.
1343
1344=back
1345
1346=head1 BUGS
1347
b4558dc4 1348Eight-bit input data isn't handled at all well at present. The correct
1349approach would be to map EE<lt>E<gt> escapes to the appropriate UTF-8
1350characters and then do a translation pass on the output according to the
1351user-specified output character set. Unfortunately, we can't send eight-bit
1352data directly to the output unless the user says this is okay, since some
1353vendor *roff implementations can't handle eight-bit data. If the *roff
1354implementation can, however, that's far superior to the current hacked
1355characters that only work under troff.
1356
1357There is currently no way to turn off the guesswork that tries to format
1358unmarked text appropriately, and sometimes it isn't wanted (particularly
1359when using POD to document something other than Perl).
9741dab0 1360
1361The NAME section should be recognized specially and index entries emitted
1362for everything in that section. This would have to be deferred until the
1363next section, since extraneous things in NAME tends to confuse various man
1364page processors.
1365
9741dab0 1366Pod::Man doesn't handle font names longer than two characters. Neither do
bf202ccd 1367most B<troff> implementations, but GNU troff does as an extension. It would
9741dab0 1368be nice to support as an option for those who want to use it.
1369
1370The preamble added to each output file is rather verbose, and most of it is
1371only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
1372characters. It would ideally be nice if all of those definitions were only
1373output if needed, perhaps on the fly as the characters are used.
1374
9741dab0 1375Pod::Man is excessively slow.
1376
b4558dc4 1377=head1 CAVEATS
1378
1379The handling of hyphens and em dashes is somewhat fragile, and one may get
1380the wrong one under some circumstances. This should only matter for
1381B<troff> output.
1382
1383When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
1384necessarily get it right.
1385
9741dab0 1386=head1 SEE ALSO
1387
bf202ccd 1388L<Pod::Parser>, L<perlpod(1)>, L<pod2man(1)>, L<nroff(1)>, L<troff(1)>,
1389L<man(1)>, L<man(7)>
9741dab0 1390
1391Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
1392Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
bf202ccd 1393the best documentation of standard B<nroff> and B<troff>. At the time of
1394this writing, it's available at
1395L<http://www.cs.bell-labs.com/cm/cs/cstr.html>.
9741dab0 1396
bf202ccd 1397The man page documenting the man macro set may be L<man(5)> instead of
1398L<man(7)> on your system. Also, please see L<pod2man(1)> for extensive
1399documentation on writing manual pages if you've not done it before and
1400aren't familiar with the conventions.
9741dab0 1401
1402=head1 AUTHOR
1403
bf202ccd 1404Russ Allbery <rra@stanford.edu>, based I<very> heavily on the original
1405B<pod2man> by Tom Christiansen <tchrist@mox.perl.com>.
9741dab0 1406
3c014959 1407=head1 COPYRIGHT AND LICENSE
1408
1409Copyright 1999, 2000, 2001 by Russ Allbery <rra@stanford.edu>.
1410
1411This program is free software; you may redistribute it and/or modify it
1412under the same terms as Perl itself.
1413
9741dab0 1414=cut