pod/pod2man.PL

   1 #!/usr/local/bin/perl
   2
   3 use Config;
   4 use File::Basename qw(&basename &dirname);
   5 use Cwd;
   6
   7 # List explicitly here the variables you want Configure to
   8 # generate.  Metaconfig only looks for shell variables, so you
   9 # have to mention them as if they were shell variables, not
  10 # %Config entries.  Thus you write
  11 #  $startperl
  12 #  $man3ext
  13 # to ensure Configure will look for $Config{startperl}.
  14
  15 # This forces PL files to create target in same directory as PL file.
  16 # This is so that make depend always knows where to find PL derivatives.
  17 $origdir = cwd;
  18 chdir dirname($0);
  19 $file = basename($0, '.PL');
  20 $file .= '.com' if $^O eq 'VMS';
  21
  22 open OUT,">$file" or die "Can't create $file: $!";
  23
  24 print "Extracting $file (with variable substitutions)\n";
  25
  26 # In this section, perl variables will be expanded during extraction.
  27 # You can use $Config{...} to use Configure variables.
  28
  29 print OUT <<"!GROK!THIS!";
  30 $Config{startperl}
  31     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
  32         if \$running_under_some_shell;
  33
  34 \$DEF_PM_SECTION = '$Config{man3ext}' || '3';
  35 !GROK!THIS!
  36
  37 # In the following, perl variables are not expanded during extraction.
  38
  39 print OUT <<'!NO!SUBS!';
  40
  41 =head1 NAME
  42
  43 pod2man - translate embedded Perl pod directives into man pages
  44
  45 =head1 SYNOPSIS
  46
  47 B<pod2man>
  48 [ B<--section=>I<manext> ]
  49 [ B<--release=>I<relpatch> ]
  50 [ B<--center=>I<string> ]
  51 [ B<--date=>I<string> ]
  52 [ B<--fixed=>I<font> ]
  53 [ B<--official> ]
  54 [ B<--lax> ]
  55 I<inputfile>
  56
  57 =head1 DESCRIPTION
  58
  59 B<pod2man> converts its input file containing embedded pod directives (see
  60 L<perlpod>) into nroff source suitable for viewing with nroff(1) or
  61 troff(1) using the man(7) macro set.
  62
  63 Besides the obvious pod conversions, B<pod2man> also takes care of
  64 func(), func(n), and simple variable references like $foo or @bar so
  65 you don't have to use code escapes for them; complex expressions like
  66 C<$fred{'stuff'}> will still need to be escaped, though.  Other nagging
  67 little roffish things that it catches include translating the minus in
  68 something like foo-bar, making a long dash--like this--into a real em
  69 dash, fixing up "paired quotes", putting a little space after the
  70 parens in something like func(), making C++ and PI look right, making
  71 double underbars have a little tiny space between them, making ALLCAPS
  72 a teeny bit smaller in troff(1), and escaping backslashes so you don't
  73 have to.
  74
  75 =head1 OPTIONS
  76
  77 =over 8
  78
  79 =item center
  80
  81 Set the centered header to a specific string.  The default is
  82 "User Contributed Perl Documentation", unless the C<--official> flag is
  83 given, in which case the default is "Perl Programmers Reference Guide".
  84
  85 =item date
  86
  87 Set the left-hand footer string to this value.  By default,
  88 the modification date of the input file will be used.
  89
  90 =item fixed
  91
  92 The fixed font to use for code refs.  Defaults to CW.
  93
  94 =item official
  95
  96 Set the default header to indicate that this page is of
  97 the standard release in case C<--center> is not given.
  98
  99 =item release
 100
 101 Set the centered footer.  By default, this is the current
 102 perl release.
 103
 104 =item section
 105
 106 Set the section for the C<.TH> macro.  The standard conventions on
 107 sections are to use 1 for user commands,  2 for system calls, 3 for
 108 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
 109 miscellaneous information, and 8 for administrator commands.  This works
 110 best if you put your Perl man pages in a separate tree, like
 111 F</usr/local/perl/man/>.  By default, section 1 will be used
 112 unless the file ends in F<.pm> in which case section 3 will be selected.
 113
 114 =item lax
 115
 116 Don't complain when required sections aren't present.
 117
 118 =back
 119
 120 =head1 Anatomy of a Proper Man Page
 121
 122 For those not sure of the proper layout of a man page, here's
 123 an example of the skeleton of a proper man page.  Head of the
 124 major headers should be setout as a C<=head1> directive, and
 125 are historically written in the rather startling ALL UPPER CASE
 126 format, although this is not mandatory.
 127 Minor headers may be included using C<=head2>, and are
 128 typically in mixed case.
 129
 130 =over 10
 131
 132 =item NAME
 133
 134 Mandatory section; should be a comma-separated list of programs or
 135 functions documented by this podpage, such as:
 136
 137     foo, bar - programs to do something
 138
 139 =item SYNOPSIS
 140
 141 A short usage summary for programs and functions, which
 142 may someday be deemed mandatory.
 143
 144 =item DESCRIPTION
 145
 146 Long drawn out discussion of the program.  It's a good idea to break this
 147 up into subsections using the C<=head2> directives, like
 148
 149     =head2 A Sample Subection
 150
 151     =head2 Yet Another Sample Subection
 152
 153 =item OPTIONS
 154
 155 Some people make this separate from the description.
 156
 157 =item RETURN VALUE
 158
 159 What the program or function returns if successful.
 160
 161 =item ERRORS
 162
 163 Exceptions, return codes, exit stati, and errno settings.
 164
 165 =item EXAMPLES
 166
 167 Give some example uses of the program.
 168
 169 =item ENVIRONMENT
 170
 171 Envariables this program might care about.
 172
 173 =item FILES
 174
 175 All files used by the program.  You should probably use the FE<lt>E<gt>
 176 for these.
 177
 178 =item SEE ALSO
 179
 180 Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8).
 181
 182 =item NOTES
 183
 184 Miscellaneous commentary.
 185
 186 =item CAVEATS
 187
 188 Things to take special care with; sometimes called WARNINGS.
 189
 190 =item DIAGNOSTICS
 191
 192 All possible messages the program can print out--and
 193 what they mean.
 194
 195 =item BUGS
 196
 197 Things that are broken or just don't work quite right.
 198
 199 =item RESTRICTIONS
 200
 201 Bugs you don't plan to fix :-)
 202
 203 =item AUTHOR
 204
 205 Who wrote it (or AUTHORS if multiple).
 206
 207 =item HISTORY
 208
 209 Programs derived from other sources sometimes have this, or
 210 you might keep a modification log here.
 211
 212 =back
 213
 214 =head1 EXAMPLES
 215
 216     pod2man program > program.1
 217     pod2man some_module.pm > /usr/perl/man/man3/some_module.3
 218     pod2man --section=7 note.pod > note.7
 219
 220 =head1 DIAGNOSTICS
 221
 222 The following diagnostics are generated by B<pod2man>.  Items
 223 marked "(W)" are non-fatal, whereas the "(F)" errors will cause
 224 B<pod2man> to immediately exit with a non-zero status.
 225
 226 =over 4
 227
 228 =item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s>
 229
 230 (W) If you start include an option, you should set it off
 231 as bold, italic, or code.
 232
 233 =item can't open %s: %s
 234
 235 (F) The input file wasn't available for the given reason.
 236
 237 =item Improper man page - no dash in NAME header in paragraph %d of %s
 238
 239 (W) The NAME header did not have an isolated dash in it.  This is
 240 considered important.
 241
 242 =item Invalid man page - no NAME line in %s
 243
 244 (F) You did not include a NAME header, which is essential.
 245
 246 =item roff font should be 1 or 2 chars, not `%s'  (F)
 247
 248 (F) The font specified with the C<--fixed> option was not
 249 a one- or two-digit roff font.
 250
 251 =item %s is missing required section: %s
 252
 253 (W) Required sections include NAME, DESCRIPTION, and if you're
 254 using a section starting with a 3, also a SYNOPSIS.  Actually,
 255 not having a NAME is a fatal.
 256
 257 =item Unknown escape: %s in %s
 258
 259 (W) An unknown HTML entity (probably for an 8-bit character) was given via
 260 a C<EE<lt>E<gt>> directive.  Besides amp, lt, gt, and quot, recognized
 261 entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave,
 262 Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute,
 263 Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc,
 264 icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc,
 265 ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig,
 266 THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml,
 267 Yacute, yacute, and yuml.
 268
 269 =item Unmatched =back
 270
 271 (W) You have a C<=back> without a corresponding C<=over>.
 272
 273 =item Unrecognized pod directive: %s
 274
 275 (W) You specified a pod directive that isn't in the known list of
 276 C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>.
 277
 278
 279 =back
 280
 281 =head1 NOTES
 282
 283 If you would like to print out a lot of man page continuously, you
 284 probably want to set the C and D registers to set contiguous page
 285 numbering and even/odd paging, at least on some versions of man(7).
 286 Settting the F register will get you some additional experimental
 287 indexing:
 288
 289     troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ...
 290
 291 The indexing merely outputs messages via C<.tm> for each
 292 major page, section, subsection, item, and any C<XE<lt>E<gt>>
 293 directives.
 294
 295
 296 =head1 RESTRICTIONS
 297
 298 None at this time.
 299
 300 =head1 BUGS
 301
 302 The =over and =back directives don't really work right.  They
 303 take absolute positions instead of offsets, don't nest well, and
 304 making people count is suboptimal in any event.
 305
 306 =head1 AUTHORS
 307
 308 Original prototype by Larry Wall, but so massively hacked over by
 309 Tom Christiansen such that Larry probably doesn't recognize it anymore.
 310
 311 =cut
 312
 313 $/ = "";
 314 $cutting = 1;
 315 @Indices = ();
 316
 317 # We try first to get the version number from a local binary, in case we're
 318 # running an installed version of Perl to produce documentation from an
 319 # uninstalled newer version's pod files.
 320 if ($^O ne 'plan9' and $^O ne 'dos' and $^O ne 'os2' and $^O ne 'MSWin32') {
 321   ($version,$patch) =
 322     `\PATH=.:..:\$PATH; perl -v` =~ /version (\d\.\d{3})(?:_(\d{2}))?/;
 323 }
 324 # No luck; we'll just go with the running Perl's version
 325 ($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version;
 326 $DEF_RELEASE  = "perl $version";
 327 $DEF_RELEASE .= ", patch $patch" if $patch;
 328
 329
 330 sub makedate {
 331     my $secs = shift;
 332     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs);
 333     my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon];
 334     return "$mday/$mname/$year";
 335 }
 336
 337 use Getopt::Long;
 338
 339 $DEF_SECTION = 1;
 340 $DEF_CENTER = "User Contributed Perl Documentation";
 341 $STD_CENTER = "Perl Programmers Reference Guide";
 342 $DEF_FIXED = 'CW';
 343 $DEF_LAX = 0;
 344
 345 sub usage {
 346     warn "$0: @_\n" if @_;
 347     die <<EOF;
 348 usage: $0 [options] podpage
 349 Options are:
 350         --section=manext      (default "$DEF_SECTION")
 351         --release=relpatch    (default "$DEF_RELEASE")
 352         --center=string       (default "$DEF_CENTER")
 353         --date=string         (default "$DEF_DATE")
 354         --fixed=font          (default "$DEF_FIXED")
 355         --official            (default NOT)
 356         --lax                 (default NOT)
 357 EOF
 358 }
 359
 360 $uok = GetOptions( qw(
 361         section=s
 362         release=s
 363         center=s
 364         date=s
 365         fixed=s
 366         official
 367         lax
 368         help));
 369
 370 $DEF_DATE = makedate((stat($ARGV[0]))[9] || time());
 371
 372 usage("Usage error!") unless $uok;
 373 usage() if $opt_help;
 374 usage("Need one and only one podpage argument") unless @ARGV == 1;
 375
 376 $section = $opt_section || ($ARGV[0] =~ /\.pm$/
 377                                 ? $DEF_PM_SECTION : $DEF_SECTION);
 378 $RP = $opt_release || $DEF_RELEASE;
 379 $center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER);
 380 $lax = $opt_lax || $DEF_LAX;
 381
 382 $CFont = $opt_fixed || $DEF_FIXED;
 383
 384 if (length($CFont) == 2) {
 385     $CFont_embed = "\\f($CFont";
 386 }
 387 elsif (length($CFont) == 1) {
 388     $CFont_embed = "\\f$CFont";
 389 }
 390 else {
 391     die "roff font should be 1 or 2 chars, not `$CFont_embed'";
 392 }
 393
 394 $date = $opt_date || $DEF_DATE;
 395
 396 for (qw{NAME DESCRIPTION}) {
 397 # for (qw{NAME DESCRIPTION AUTHOR}) {
 398     $wanna_see{$_}++;
 399 }
 400 $wanna_see{SYNOPSIS}++ if $section =~ /^3/;
 401
 402
 403 $name = @ARGV ? $ARGV[0] : "<STDIN>";
 404 $Filename = $name;
 405 if ($section =~ /^1/) {
 406     require File::Basename;
 407     $name = uc File::Basename::basename($name);
 408 }
 409 $name =~ s/\.(pod|p[lm])$//i;
 410
 411 # Lose everything up to the first of
 412 #     */lib/*perl*      standard or site_perl module
 413 #     */*perl*/lib      from -D prefix=/opt/perl
 414 #     */*perl*/         random module hierarchy
 415 # which works.
 416 $name =~ s-//+-/-g;
 417 if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i
 418         or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i
 419         or $name =~ s-^.*?/[^/]*perl[^/]*/--i) {
 420     # Lose ^site(_perl)?/.
 421     $name =~ s-^site(_perl)?/--;
 422     # Lose ^arch/.      (XXX should we use Config? Just for archname?)
 423     $name =~ s~^(.*-$^O|$^O-.*)/~~o;
 424     # Lose ^version/.
 425     $name =~ s-^\d+\.\d+/--;
 426 }
 427
 428 # Translate Getopt/Long to Getopt::Long, etc.
 429 $name =~ s(/)(::)g;
 430
 431 if ($name ne 'something') {
 432     FCHECK: {
 433         open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!";
 434         while (<F>) {
 435             next unless /^=\b/;
 436             if (/^=head1\s+NAME\s*$/) {  # an /m would forgive mistakes
 437                 $_ = <F>;
 438                 unless (/\s*-+\s+/) {
 439                     $oops++;
 440                     warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n"
 441                 } else {
 442                     my @n = split /\s+-+\s+/;
 443                     if (@n != 2) {
 444                         $oops++;
 445                         warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n"
 446                     }
 447                     else {
 448                         %namedesc = @n;
 449                     }
 450                 }
 451                 last FCHECK;
 452             }
 453             next if /^=cut\b/;  # DB_File and Net::Ping have =cut before NAME
 454             next if /^=pod\b/;  # It is OK to have =pod before NAME
 455             die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax;
 456         }
 457         die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax;
 458     }
 459     close F;
 460 }
 461
 462 print <<"END";
 463 .rn '' }`
 464 ''' \$RCSfile\$\$Revision\$\$Date\$
 465 '''
 466 ''' \$Log\$
 467 '''
 468 .de Sh
 469 .br
 470 .if t .Sp
 471 .ne 5
 472 .PP
 473 \\fB\\\\\$1\\fR
 474 .PP
 475 ..
 476 .de Sp
 477 .if t .sp .5v
 478 .if n .sp
 479 ..
 480 .de Ip
 481 .br
 482 .ie \\\\n(.\$>=3 .ne \\\\\$3
 483 .el .ne 3
 484 .IP "\\\\\$1" \\\\\$2
 485 ..
 486 .de Vb
 487 .ft $CFont
 488 .nf
 489 .ne \\\\\$1
 490 ..
 491 .de Ve
 492 .ft R
 493
 494 .fi
 495 ..
 496 '''
 497 '''
 498 '''     Set up \\*(-- to give an unbreakable dash;
 499 '''     string Tr holds user defined translation string.
 500 '''     Bell System Logo is used as a dummy character.
 501 '''
 502 .tr \\(*W-|\\(bv\\*(Tr
 503 .ie n \\{\\
 504 .ds -- \\(*W-
 505 .ds PI pi
 506 .if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch
 507 .if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch
 508 .ds L" ""
 509 .ds R" ""
 510 '''   \\*(M", \\*(S", \\*(N" and \\*(T" are the equivalent of
 511 '''   \\*(L" and \\*(R", except that they are used on ".xx" lines,
 512 '''   such as .IP and .SH, which do another additional levels of
 513 '''   double-quote interpretation
 514 .ds M" """
 515 .ds S" """
 516 .ds N" """""
 517 .ds T" """""
 518 .ds L' '
 519 .ds R' '
 520 .ds M' '
 521 .ds S' '
 522 .ds N' '
 523 .ds T' '
 524 'br\\}
 525 .el\\{\\
 526 .ds -- \\(em\\|
 527 .tr \\*(Tr
 528 .ds L" ``
 529 .ds R" ''
 530 .ds M" ``
 531 .ds S" ''
 532 .ds N" ``
 533 .ds T" ''
 534 .ds L' `
 535 .ds R' '
 536 .ds M' `
 537 .ds S' '
 538 .ds N' `
 539 .ds T' '
 540 .ds PI \\(*p
 541 'br\\}
 542 END
 543
 544 print <<'END';
 545 .\"     If the F register is turned on, we'll generate
 546 .\"     index entries out stderr for the following things:
 547 .\"             TH      Title
 548 .\"             SH      Header
 549 .\"             Sh      Subsection
 550 .\"             Ip      Item
 551 .\"             X<>     Xref  (embedded
 552 .\"     Of course, you have to process the output yourself
 553 .\"     in some meaninful fashion.
 554 .if \nF \{
 555 .de IX
 556 .tm Index:\\$1\t\\n%\t"\\$2"
 557 ..
 558 .nr % 0
 559 .rr F
 560 .\}
 561 END
 562
 563 print <<"END";
 564 .TH $name $section "$RP" "$date" "$center"
 565 .UC
 566 END
 567
 568 push(@Indices, qq{.IX Title "$name $section"});
 569
 570 while (($name, $desc) = each %namedesc) {
 571     for ($name, $desc) { s/^\s+//; s/\s+$//; }
 572     push(@Indices, qq(.IX Name "$name - $desc"\n));
 573 }
 574
 575 print <<'END';
 576 .if n .hy 0
 577 .if n .na
 578 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
 579 .de CQ          \" put $1 in typewriter font
 580 END
 581 print ".ft $CFont\n";
 582 print <<'END';
 583 'if n "\c
 584 'if t \\&\\$1\c
 585 'if n \\&\\$1\c
 586 'if n \&"
 587 \\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
 588 '.ft R
 589 ..
 590 .\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
 591 .       \" AM - accent mark definitions
 592 .bd B 3
 593 .       \" fudge factors for nroff and troff
 594 .if n \{\
 595 .       ds #H 0
 596 .       ds #V .8m
 597 .       ds #F .3m
 598 .       ds #[ \f1
 599 .       ds #] \fP
 600 .\}
 601 .if t \{\
 602 .       ds #H ((1u-(\\\\n(.fu%2u))*.13m)
 603 .       ds #V .6m
 604 .       ds #F 0
 605 .       ds #[ \&
 606 .       ds #] \&
 607 .\}
 608 .       \" simple accents for nroff and troff
 609 .if n \{\
 610 .       ds ' \&
 611 .       ds ` \&
 612 .       ds ^ \&
 613 .       ds , \&
 614 .       ds ~ ~
 615 .       ds ? ?
 616 .       ds ! !
 617 .       ds /
 618 .       ds q
 619 .\}
 620 .if t \{\
 621 .       ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
 622 .       ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
 623 .       ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
 624 .       ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
 625 .       ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 626 .       ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
 627 .       ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
 628 .       ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 629 .       ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
 630 .\}
 631 .       \" troff and (daisy-wheel) nroff accents
 632 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 633 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 634 .ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
 635 .ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
 636 .ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
 637 .ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
 638 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 639 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 640 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 641 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 642 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 643 .ds ae a\h'-(\w'a'u*4/10)'e
 644 .ds Ae A\h'-(\w'A'u*4/10)'E
 645 .ds oe o\h'-(\w'o'u*4/10)'e
 646 .ds Oe O\h'-(\w'O'u*4/10)'E
 647 .       \" corrections for vroff
 648 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 649 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 650 .       \" for low resolution devices (crt and lpr)
 651 .if \n(.H>23 .if \n(.V>19 \
 652 \{\
 653 .       ds : e
 654 .       ds 8 ss
 655 .       ds v \h'-1'\o'\(aa\(ga'
 656 .       ds _ \h'-1'^
 657 .       ds . \h'-1'.
 658 .       ds 3 3
 659 .       ds o a
 660 .       ds d- d\h'-1'\(ga
 661 .       ds D- D\h'-1'\(hy
 662 .       ds th \o'bp'
 663 .       ds Th \o'LP'
 664 .       ds ae ae
 665 .       ds Ae AE
 666 .       ds oe oe
 667 .       ds Oe OE
 668 .\}
 669 .rm #[ #] #H #V #F C
 670 END
 671
 672 $indent = 0;
 673
 674 $begun = "";
 675
 676 while (<>) {
 677     if ($cutting) {
 678         next unless /^=/;
 679         $cutting = 0;
 680     }
 681     if ($begun) {
 682         if (/^=end\s+$begun/) {
 683             $begun = "";
 684         }
 685         elsif ($begun =~ /^(roff|man)$/) {
 686             print STDOUT $_;
 687         }
 688         next;
 689     }
 690     chomp;
 691
 692     # Translate verbatim paragraph
 693
 694     if (/^\s/) {
 695         @lines = split(/\n/);
 696         for (@lines) {
 697             1 while s
 698                 {^( [^\t]* ) \t ( \t* ) }
 699                 { $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex;
 700             s/\\/\\e/g;
 701             s/\A/\\&/s;
 702         }
 703         $lines = @lines;
 704         makespace() unless $verbatim++;
 705         print ".Vb $lines\n";
 706         print join("\n", @lines), "\n";
 707         print ".Ve\n";
 708         $needspace = 0;
 709         next;
 710     }
 711
 712     $verbatim = 0;
 713
 714     if (/^=for\s+(\S+)\s*/s) {
 715         if ($1 eq "man" or $1 eq "roff") {
 716             print STDOUT $',"\n\n";
 717         } else {
 718             # ignore unknown for
 719         }
 720         next;
 721     }
 722     elsif (/^=begin\s+(\S+)\s*/s) {
 723         $begun = $1;
 724         if ($1 eq "man" or $1 eq "roff") {
 725             print STDOUT $'."\n\n";
 726         }
 727         next;
 728     }
 729
 730     # check for things that'll hosed our noremap scheme; affects $_
 731     init_noremap();
 732
 733     if (!/^=item/) {
 734
 735         # trofficate backslashes; must do it before what happens below
 736         s/\\/noremap('\\e')/ge;
 737
 738         # protect leading periods and quotes against *roff
 739         # mistaking them for directives
 740         s/^(?:[A-Z]<)?[.']/\\&$&/gm;
 741
 742         # first hide the escapes in case we need to
 743         # intuit something and get it wrong due to fmting
 744
 745         1 while s/([A-Z]<[^<>]*>)/noremap($1)/ge;
 746
 747         # func() is a reference to a perl function
 748         s{
 749             \b
 750             (
 751                 [:\w]+ \(\)
 752             )
 753         } {I<$1>}gx;
 754
 755         # func(n) is a reference to a perl function or a man page
 756         s{
 757             ([:\w]+)
 758             (
 759                 \( [^\051]+ \)
 760             )
 761         } {I<$1>\\|$2}gx;
 762
 763         # convert simple variable references
 764         s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g;
 765
 766         if (m{ (
 767                     [\-\w]+
 768                     \(
 769                         [^\051]*?
 770                         [\@\$,]
 771                         [^\051]*?
 772                     \)
 773                 )
 774             }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
 775         {
 776             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n";
 777             $oops++;
 778         }
 779
 780         while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
 781             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n";
 782             $oops++;
 783         }
 784
 785         # put it back so we get the <> processed again;
 786         clear_noremap(0); # 0 means leave the E's
 787
 788     } else {
 789         # trofficate backslashes
 790         s/\\/noremap('\\e')/ge;
 791
 792     }
 793
 794     # need to hide E<> first; they're processed in clear_noremap
 795     s/(E<[^<>]+>)/noremap($1)/ge;
 796
 797
 798     $maxnest = 10;
 799     while ($maxnest-- && /[A-Z]</) {
 800
 801         # can't do C font here
 802         s/([BI])<([^<>]*)>/font($1) . $2 . font('R')/eg;
 803
 804         # files and filelike refs in italics
 805         s/F<([^<>]*)>/I<$1>/g;
 806
 807         # no break -- usually we want C<> for this
 808         s/S<([^<>]*)>/nobreak($1)/eg;
 809
 810         # LREF: a la HREF L<show this text|man/section>
 811         s:L<([^|>]+)\|[^>]+>:$1:g;
 812
 813         # LREF: a manpage(3f)
 814         s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g;
 815
 816         # LREF: an =item on another manpage
 817         s{
 818             L<
 819                 ([^/]+)
 820                 /
 821                 (
 822                     [:\w]+
 823                     (\(\))?
 824                 )
 825             >
 826         } {the C<$2> entry in the I<$1> manpage}gx;
 827
 828         # LREF: an =item on this manpage
 829         s{
 830            ((?:
 831             L<
 832                 /
 833                 (
 834                     [:\w]+
 835                     (\(\))?
 836                 )
 837             >
 838             (,?\s+(and\s+)?)?
 839           )+)
 840         } { internal_lrefs($1) }gex;
 841
 842         # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
 843         # the "func" can disambiguate
 844         s{
 845             L<
 846                 (?:
 847                     ([a-zA-Z]\S+?) /
 848                 )?
 849                 "?(.*?)"?
 850             >
 851         }{
 852             do {
 853                 $1      # if no $1, assume it means on this page.
 854                     ?  "the section on I<$2> in the I<$1> manpage"
 855                     :  "the section on I<$2>"
 856             }
 857         }gesx; # s in case it goes over multiple lines, so . matches \n
 858
 859         s/Z<>/\\&/g;
 860
 861         # comes last because not subject to reprocessing
 862         s/C<([^<>]*)>/noremap("${CFont_embed}${1}\\fR")/eg;
 863     }
 864
 865     if (s/^=//) {
 866         $needspace = 0;         # Assume this.
 867
 868         s/\n/ /g;
 869
 870         ($Cmd, $_) = split(' ', $_, 2);
 871
 872         $dotlevel = 1;
 873         if ($Cmd eq 'head1') {
 874            $dotlevel = 1;
 875         }
 876         elsif ($Cmd eq 'head2') {
 877            $dotlevel = 1;
 878         }
 879         elsif ($Cmd eq 'item') {
 880            $dotlevel = 2;
 881         }
 882
 883         if (defined $_) {
 884             &escapes($dotlevel);
 885             s/"/""/g;
 886         }
 887
 888         clear_noremap(1);
 889
 890         if ($Cmd eq 'cut') {
 891             $cutting = 1;
 892         }
 893         elsif ($Cmd eq 'head1') {
 894             s/\s+$//;
 895             delete $wanna_see{$_} if exists $wanna_see{$_};
 896             print qq{.SH "$_"\n};
 897       push(@Indices, qq{.IX Header "$_"\n});
 898         }
 899         elsif ($Cmd eq 'head2') {
 900             print qq{.Sh "$_"\n};
 901       push(@Indices, qq{.IX Subsection "$_"\n});
 902         }
 903         elsif ($Cmd eq 'over') {
 904             push(@indent,$indent);
 905             $indent += ($_ + 0) || 5;
 906         }
 907         elsif ($Cmd eq 'back') {
 908             $indent = pop(@indent);
 909             warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent;
 910             $needspace = 1;
 911         }
 912         elsif ($Cmd eq 'item') {
 913             s/^\*( |$)/\\(bu$1/g;
 914             # if you know how to get ":s please do
 915             s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g;
 916             s/\\\*\(L"([^"]+?)""/'$1'/g;
 917             s/[^"]""([^"]+?)""[^"]/'$1'/g;
 918             # here do something about the $" in perlvar?
 919             print STDOUT qq{.Ip "$_" $indent\n};
 920       push(@Indices, qq{.IX Item "$_"\n});
 921         }
 922         elsif ($Cmd eq 'pod') {
 923             # this is just a comment
 924         }
 925         else {
 926             warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n";
 927         }
 928     }
 929     else {
 930         if ($needspace) {
 931             &makespace;
 932         }
 933         &escapes(0);
 934         clear_noremap(1);
 935         print $_, "\n";
 936         $needspace = 1;
 937     }
 938 }
 939
 940 print <<"END";
 941
 942 .rn }` ''
 943 END
 944
 945 if (%wanna_see && !$lax) {
 946     @missing = keys %wanna_see;
 947     warn "$0: $Filename is missing required section"
 948         .  (@missing > 1 && "s")
 949         .  ": @missing\n";
 950     $oops++;
 951 }
 952
 953 foreach (@Indices) { print "$_\n"; }
 954
 955 exit;
 956 #exit ($oops != 0);
 957
 958 #########################################################################
 959
 960 sub nobreak {
 961     my $string = shift;
 962     $string =~ s/ /\\ /g;
 963     $string;
 964 }
 965
 966 sub escapes {
 967     my $indot = shift;
 968
 969     s/X<(.*?)>/mkindex($1)/ge;
 970
 971     # translate the minus in foo-bar into foo\-bar for roff
 972     s/([^0-9a-z-])-([^-])/$1\\-$2/g;
 973
 974     # make -- into the string version \*(-- (defined above)
 975     s/\b--\b/\\*(--/g;
 976     s/"--([^"])/"\\*(--$1/g;  # should be a better way
 977     s/([^"])--"/$1\\*(--"/g;
 978
 979     # fix up quotes; this is somewhat tricky
 980     my $dotmacroL = 'L';
 981     my $dotmacroR = 'R';
 982     if ( $indot == 1 ) {
 983         $dotmacroL = 'M';
 984         $dotmacroR = 'S';
 985     }
 986     elsif ( $indot >= 2 ) {
 987         $dotmacroL = 'N';
 988         $dotmacroR = 'T';
 989     }
 990     if (!/""/) {
 991         s/(^|\s)(['"])/noremap("$1\\*($dotmacroL$2")/ge;
 992         s/(['"])($|[\-\s,;\\!?.])/noremap("\\*($dotmacroR$1$2")/ge;
 993     }
 994
 995     #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g;
 996     #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g;
 997
 998
 999     # make sure that func() keeps a bit a space tween the parens
1000     ### s/\b\(\)/\\|()/g;
1001     ### s/\b\(\)/(\\|)/g;
1002
1003     # make C++ into \*C+, which is a squinched version (defined above)
1004     s/\bC\+\+/\\*(C+/g;
1005
1006     # make double underbars have a little tiny space between them
1007     s/__/_\\|_/g;
1008
1009     # PI goes to \*(PI (defined above)
1010     s/\bPI\b/noremap('\\*(PI')/ge;
1011
1012     # make all caps a teeny bit smaller, but don't muck with embedded code literals
1013     my $hidCFont = font('C');
1014     if ($Cmd !~ /^head1/) { # SH already makes smaller
1015         # /g isn't enough; 1 while or we'll be off
1016
1017 #       1 while s{
1018 #           (?!$hidCFont)(..|^.|^)
1019 #           \b
1020 #           (
1021 #               [A-Z][\/A-Z+:\-\d_$.]+
1022 #           )
1023 #           (s?)
1024 #           \b
1025 #       } {$1\\s-1$2\\s0}gmox;
1026
1027         1 while s{
1028             (?!$hidCFont)(..|^.|^)
1029             (
1030                 \b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b
1031             )
1032         } {
1033             $1 . noremap( '\\s-1' .  $2 . '\\s0' )
1034         }egmox;
1035
1036     }
1037 }
1038
1039 # make troff just be normal, but make small nroff get quoted
1040 # decided to just put the quotes in the text; sigh;
1041 sub ccvt {
1042     local($_,$prev) = @_;
1043     noremap(qq{.CQ "$_" \n\\&});
1044 }
1045
1046 sub makespace {
1047     if ($indent) {
1048         print ".Sp\n";
1049     }
1050     else {
1051         print ".PP\n";
1052     }
1053 }
1054
1055 sub mkindex {
1056     my ($entry) = @_;
1057     my @entries = split m:\s*/\s*:, $entry;
1058     push @Indices, ".IX Xref " . join ' ', map {qq("$_")} @entries;
1059     return '';
1060 }
1061
1062 sub font {
1063     local($font) = shift;
1064     return '\\f' . noremap($font);
1065 }
1066
1067 sub noremap {
1068     local($thing_to_hide) = shift;
1069     $thing_to_hide =~ tr/\000-\177/\200-\377/;
1070     return $thing_to_hide;
1071 }
1072
1073 sub init_noremap {
1074         # escape high bit characters in input stream
1075         s/([\200-\377])/"E<".ord($1).">"/ge;
1076 }
1077
1078 sub clear_noremap {
1079     my $ready_to_print = $_[0];
1080
1081     tr/\200-\377/\000-\177/;
1082
1083     # trofficate backslashes
1084     # s/(?!\\e)(?:..|^.|^)\\/\\e/g;
1085
1086     # now for the E<>s, which have been hidden until now
1087     # otherwise the interative \w<> processing would have
1088     # been hosed by the E<gt>
1089     s {
1090             E<
1091             (
1092                 ( \d + )
1093                 | ( [A-Za-z]+ )
1094             )
1095             >
1096     } {
1097          do {
1098              defined $2
1099                 ? chr($2)
1100                 :
1101              exists $HTML_Escapes{$3}
1102                 ? do { $HTML_Escapes{$3} }
1103                 : do {
1104                     warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n";
1105                     "E<$1>";
1106                 }
1107          }
1108     }egx if $ready_to_print;
1109 }
1110
1111 sub internal_lrefs {
1112     local($_) = shift;
1113     local $trailing_and = s/and\s+$// ? "and " : "";
1114
1115     s{L</([^>]+)>}{$1}g;
1116     my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
1117     my $retstr = "the ";
1118     my $i;
1119     for ($i = 0; $i <= $#items; $i++) {
1120         $retstr .= "C<$items[$i]>";
1121         $retstr .= ", " if @items > 2 && $i != $#items;
1122         $retstr .= " and " if $i+2 == @items;
1123     }
1124
1125     $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
1126             .  " elsewhere in this document "; # terminal space to avoid words running together (pattern used strips terminal spaces)
1127     $retstr .=  $trailing_and;
1128
1129     return $retstr;
1130
1131 }
1132
1133 BEGIN {
1134 %HTML_Escapes = (
1135     'amp'       =>      '&',    #   ampersand
1136     'lt'        =>      '<',    #   left chevron, less-than
1137     'gt'        =>      '>',    #   right chevron, greater-than
1138     'quot'      =>      '"',    #   double quote
1139
1140     "Aacute"    =>      "A\\*'",        #   capital A, acute accent
1141     "aacute"    =>      "a\\*'",        #   small a, acute accent
1142     "Acirc"     =>      "A\\*^",        #   capital A, circumflex accent
1143     "acirc"     =>      "a\\*^",        #   small a, circumflex accent
1144     "AElig"     =>      '\*(AE',        #   capital AE diphthong (ligature)
1145     "aelig"     =>      '\*(ae',        #   small ae diphthong (ligature)
1146     "Agrave"    =>      "A\\*`",        #   capital A, grave accent
1147     "agrave"    =>      "A\\*`",        #   small a, grave accent
1148     "Aring"     =>      'A\\*o',        #   capital A, ring
1149     "aring"     =>      'a\\*o',        #   small a, ring
1150     "Atilde"    =>      'A\\*~',        #   capital A, tilde
1151     "atilde"    =>      'a\\*~',        #   small a, tilde
1152     "Auml"      =>      'A\\*:',        #   capital A, dieresis or umlaut mark
1153     "auml"      =>      'a\\*:',        #   small a, dieresis or umlaut mark
1154     "Ccedil"    =>      'C\\*,',        #   capital C, cedilla
1155     "ccedil"    =>      'c\\*,',        #   small c, cedilla
1156     "Eacute"    =>      "E\\*'",        #   capital E, acute accent
1157     "eacute"    =>      "e\\*'",        #   small e, acute accent
1158     "Ecirc"     =>      "E\\*^",        #   capital E, circumflex accent
1159     "ecirc"     =>      "e\\*^",        #   small e, circumflex accent
1160     "Egrave"    =>      "E\\*`",        #   capital E, grave accent
1161     "egrave"    =>      "e\\*`",        #   small e, grave accent
1162     "ETH"       =>      '\\*(D-',       #   capital Eth, Icelandic
1163     "eth"       =>      '\\*(d-',       #   small eth, Icelandic
1164     "Euml"      =>      "E\\*:",        #   capital E, dieresis or umlaut mark
1165     "euml"      =>      "e\\*:",        #   small e, dieresis or umlaut mark
1166     "Iacute"    =>      "I\\*'",        #   capital I, acute accent
1167     "iacute"    =>      "i\\*'",        #   small i, acute accent
1168     "Icirc"     =>      "I\\*^",        #   capital I, circumflex accent
1169     "icirc"     =>      "i\\*^",        #   small i, circumflex accent
1170     "Igrave"    =>      "I\\*`",        #   capital I, grave accent
1171     "igrave"    =>      "i\\*`",        #   small i, grave accent
1172     "Iuml"      =>      "I\\*:",        #   capital I, dieresis or umlaut mark
1173     "iuml"      =>      "i\\*:",        #   small i, dieresis or umlaut mark
1174     "Ntilde"    =>      'N\*~',         #   capital N, tilde
1175     "ntilde"    =>      'n\*~',         #   small n, tilde
1176     "Oacute"    =>      "O\\*'",        #   capital O, acute accent
1177     "oacute"    =>      "o\\*'",        #   small o, acute accent
1178     "Ocirc"     =>      "O\\*^",        #   capital O, circumflex accent
1179     "ocirc"     =>      "o\\*^",        #   small o, circumflex accent
1180     "Ograve"    =>      "O\\*`",        #   capital O, grave accent
1181     "ograve"    =>      "o\\*`",        #   small o, grave accent
1182     "Oslash"    =>      "O\\*/",        #   capital O, slash
1183     "oslash"    =>      "o\\*/",        #   small o, slash
1184     "Otilde"    =>      "O\\*~",        #   capital O, tilde
1185     "otilde"    =>      "o\\*~",        #   small o, tilde
1186     "Ouml"      =>      "O\\*:",        #   capital O, dieresis or umlaut mark
1187     "ouml"      =>      "o\\*:",        #   small o, dieresis or umlaut mark
1188     "szlig"     =>      '\*8',          #   small sharp s, German (sz ligature)
1189     "THORN"     =>      '\\*(Th',       #   capital THORN, Icelandic
1190     "thorn"     =>      '\\*(th',,      #   small thorn, Icelandic
1191     "Uacute"    =>      "U\\*'",        #   capital U, acute accent
1192     "uacute"    =>      "u\\*'",        #   small u, acute accent
1193     "Ucirc"     =>      "U\\*^",        #   capital U, circumflex accent
1194     "ucirc"     =>      "u\\*^",        #   small u, circumflex accent
1195     "Ugrave"    =>      "U\\*`",        #   capital U, grave accent
1196     "ugrave"    =>      "u\\*`",        #   small u, grave accent
1197     "Uuml"      =>      "U\\*:",        #   capital U, dieresis or umlaut mark
1198     "uuml"      =>      "u\\*:",        #   small u, dieresis or umlaut mark
1199     "Yacute"    =>      "Y\\*'",        #   capital Y, acute accent
1200     "yacute"    =>      "y\\*'",        #   small y, acute accent
1201     "yuml"      =>      "y\\*:",        #   small y, dieresis or umlaut mark
1202 );
1203 }
1204
1205 !NO!SUBS!
1206
1207 close OUT or die "Can't close $file: $!";
1208 chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
1209 exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
1210 chdir $origdir;