pod/pod2man.PL

   1 #!/usr/local/bin/perl
   2
   3 use Config;
   4 use File::Basename qw(&basename &dirname);
   5 use Cwd;
   6
   7 # List explicitly here the variables you want Configure to
   8 # generate.  Metaconfig only looks for shell variables, so you
   9 # have to mention them as if they were shell variables, not
  10 # %Config entries.  Thus you write
  11 #  $startperl
  12 #  $man3ext
  13 # to ensure Configure will look for $Config{startperl}.
  14
  15 # This forces PL files to create target in same directory as PL file.
  16 # This is so that make depend always knows where to find PL derivatives.
  17 $origdir = cwd;
  18 chdir dirname($0);
  19 $file = basename($0, '.PL');
  20 $file .= '.com' if $^O eq 'VMS';
  21
  22 open OUT,">$file" or die "Can't create $file: $!";
  23
  24 print "Extracting $file (with variable substitutions)\n";
  25
  26 # In this section, perl variables will be expanded during extraction.
  27 # You can use $Config{...} to use Configure variables.
  28
  29 print OUT <<"!GROK!THIS!";
  30 $Config{startperl}
  31     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
  32         if \$running_under_some_shell;
  33
  34 \$DEF_PM_SECTION = '$Config{man3ext}' || '3';
  35 !GROK!THIS!
  36
  37 # In the following, perl variables are not expanded during extraction.
  38
  39 print OUT <<'!NO!SUBS!';
  40
  41 =head1 NAME
  42
  43 pod2man - translate embedded Perl pod directives into man pages
  44
  45 =head1 SYNOPSIS
  46
  47 B<pod2man>
  48 [ B<--section=>I<manext> ]
  49 [ B<--release=>I<relpatch> ]
  50 [ B<--center=>I<string> ]
  51 [ B<--date=>I<string> ]
  52 [ B<--fixed=>I<font> ]
  53 [ B<--official> ]
  54 [ B<--lax> ]
  55 I<inputfile>
  56
  57 =head1 DESCRIPTION
  58
  59 B<pod2man> converts its input file containing embedded pod directives (see
  60 L<perlpod>) into nroff source suitable for viewing with nroff(1) or
  61 troff(1) using the man(7) macro set.
  62
  63 Besides the obvious pod conversions, B<pod2man> also takes care of
  64 func(), func(n), and simple variable references like $foo or @bar so
  65 you don't have to use code escapes for them; complex expressions like
  66 C<$fred{'stuff'}> will still need to be escaped, though.  Other nagging
  67 little roffish things that it catches include translating the minus in
  68 something like foo-bar, making a long dash--like this--into a real em
  69 dash, fixing up "paired quotes", putting a little space after the
  70 parens in something like func(), making C++ and PI look right, making
  71 double underbars have a little tiny space between them, making ALLCAPS
  72 a teeny bit smaller in troff(1), and escaping backslashes so you don't
  73 have to.
  74
  75 =head1 OPTIONS
  76
  77 =over 8
  78
  79 =item center
  80
  81 Set the centered header to a specific string.  The default is
  82 "User Contributed Perl Documentation", unless the C<--official> flag is
  83 given, in which case the default is "Perl Programmers Reference Guide".
  84
  85 =item date
  86
  87 Set the left-hand footer string to this value.  By default,
  88 the modification date of the input file will be used.
  89
  90 =item fixed
  91
  92 The fixed font to use for code refs.  Defaults to CW.
  93
  94 =item official
  95
  96 Set the default header to indicate that this page is of
  97 the standard release in case C<--center> is not given.
  98
  99 =item release
 100
 101 Set the centered footer.  By default, this is the current
 102 perl release.
 103
 104 =item section
 105
 106 Set the section for the C<.TH> macro.  The standard conventions on
 107 sections are to use 1 for user commands,  2 for system calls, 3 for
 108 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
 109 miscellaneous information, and 8 for administrator commands.  This works
 110 best if you put your Perl man pages in a separate tree, like
 111 F</usr/local/perl/man/>.  By default, section 1 will be used
 112 unless the file ends in F<.pm> in which case section 3 will be selected.
 113
 114 =item lax
 115
 116 Don't complain when required sections aren't present.
 117
 118 =back
 119
 120 =head1 Anatomy of a Proper Man Page
 121
 122 For those not sure of the proper layout of a man page, here's
 123 an example of the skeleton of a proper man page.  Head of the
 124 major headers should be setout as a C<=head1> directive, and
 125 are historically written in the rather startling ALL UPPER CASE
 126 format, although this is not mandatory.
 127 Minor headers may be included using C<=head2>, and are
 128 typically in mixed case.
 129
 130 =over 10
 131
 132 =item NAME
 133
 134 Mandatory section; should be a comma-separated list of programs or
 135 functions documented by this podpage, such as:
 136
 137     foo, bar - programs to do something
 138
 139 =item SYNOPSIS
 140
 141 A short usage summary for programs and functions, which
 142 may someday be deemed mandatory.
 143
 144 =item DESCRIPTION
 145
 146 Long drawn out discussion of the program.  It's a good idea to break this
 147 up into subsections using the C<=head2> directives, like
 148
 149     =head2 A Sample Subection
 150
 151     =head2 Yet Another Sample Subection
 152
 153 =item OPTIONS
 154
 155 Some people make this separate from the description.
 156
 157 =item RETURN VALUE
 158
 159 What the program or function returns if successful.
 160
 161 =item ERRORS
 162
 163 Exceptions, return codes, exit stati, and errno settings.
 164
 165 =item EXAMPLES
 166
 167 Give some example uses of the program.
 168
 169 =item ENVIRONMENT
 170
 171 Envariables this program might care about.
 172
 173 =item FILES
 174
 175 All files used by the program.  You should probably use the FE<lt>E<gt>
 176 for these.
 177
 178 =item SEE ALSO
 179
 180 Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8).
 181
 182 =item NOTES
 183
 184 Miscellaneous commentary.
 185
 186 =item CAVEATS
 187
 188 Things to take special care with; sometimes called WARNINGS.
 189
 190 =item DIAGNOSTICS
 191
 192 All possible messages the program can print out--and
 193 what they mean.
 194
 195 =item BUGS
 196
 197 Things that are broken or just don't work quite right.
 198
 199 =item RESTRICTIONS
 200
 201 Bugs you don't plan to fix :-)
 202
 203 =item AUTHOR
 204
 205 Who wrote it (or AUTHORS if multiple).
 206
 207 =item HISTORY
 208
 209 Programs derived from other sources sometimes have this, or
 210 you might keep a modification log here.
 211
 212 =back
 213
 214 =head1 EXAMPLES
 215
 216     pod2man program > program.1
 217     pod2man some_module.pm > /usr/perl/man/man3/some_module.3
 218     pod2man --section=7 note.pod > note.7
 219
 220 =head1 DIAGNOSTICS
 221
 222 The following diagnostics are generated by B<pod2man>.  Items
 223 marked "(W)" are non-fatal, whereas the "(F)" errors will cause
 224 B<pod2man> to immediately exit with a non-zero status.
 225
 226 =over 4
 227
 228 =item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s>
 229
 230 (W) If you start include an option, you should set it off
 231 as bold, italic, or code.
 232
 233 =item can't open %s: %s
 234
 235 (F) The input file wasn't available for the given reason.
 236
 237 =item Improper man page - no dash in NAME header in paragraph %d of %s
 238
 239 (W) The NAME header did not have an isolated dash in it.  This is
 240 considered important.
 241
 242 =item Invalid man page - no NAME line in %s
 243
 244 (F) You did not include a NAME header, which is essential.
 245
 246 =item roff font should be 1 or 2 chars, not `%s'  (F)
 247
 248 (F) The font specified with the C<--fixed> option was not
 249 a one- or two-digit roff font.
 250
 251 =item %s is missing required section: %s
 252
 253 (W) Required sections include NAME, DESCRIPTION, and if you're
 254 using a section starting with a 3, also a SYNOPSIS.  Actually,
 255 not having a NAME is a fatal.
 256
 257 =item Unknown escape: %s in %s
 258
 259 (W) An unknown HTML entity (probably for an 8-bit character) was given via
 260 a C<EE<lt>E<gt>> directive.  Besides amp, lt, gt, and quot, recognized
 261 entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave,
 262 Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute,
 263 Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc,
 264 icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc,
 265 ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig,
 266 THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml,
 267 Yacute, yacute, and yuml.
 268
 269 =item Unmatched =back
 270
 271 (W) You have a C<=back> without a corresponding C<=over>.
 272
 273 =item Unrecognized pod directive: %s
 274
 275 (W) You specified a pod directive that isn't in the known list of
 276 C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>.
 277
 278
 279 =back
 280
 281 =head1 NOTES
 282
 283 If you would like to print out a lot of man page continuously, you
 284 probably want to set the C and D registers to set contiguous page
 285 numbering and even/odd paging, at least on some versions of man(7).
 286 Settting the F register will get you some additional experimental
 287 indexing:
 288
 289     troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ...
 290
 291 The indexing merely outputs messages via C<.tm> for each
 292 major page, section, subsection, item, and any C<XE<lt>E<gt>>
 293 directives.
 294
 295
 296 =head1 RESTRICTIONS
 297
 298 None at this time.
 299
 300 =head1 BUGS
 301
 302 The =over and =back directives don't really work right.  They
 303 take absolute positions instead of offsets, don't nest well, and
 304 making people count is suboptimal in any event.
 305
 306 =head1 AUTHORS
 307
 308 Original prototype by Larry Wall, but so massively hacked over by
 309 Tom Christiansen such that Larry probably doesn't recognize it anymore.
 310
 311 =cut
 312
 313 $/ = "";
 314 $cutting = 1;
 315 @Indices = ();
 316
 317 # We try first to get the version number from a local binary, in case we're
 318 # running an installed version of Perl to produce documentation from an
 319 # uninstalled newer version's pod files.
 320 if ($^O ne 'plan9' and $^O ne 'dos' and $^O ne 'os2' and $^O ne 'MSWin32') {
 321   my $perl = (-x './perl') ? './perl' : ((-x '../perl') ? '../perl' : '');
 322   ($version,$patch) = `$perl -e 'print $]'` =~ /^(\d\.\d{3})(\d{2})?/ if $perl;
 323 }
 324 # No luck; we'll just go with the running Perl's version
 325 ($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version;
 326 $DEF_RELEASE  = "perl $version";
 327 $DEF_RELEASE .= ", patch $patch" if $patch;
 328
 329
 330 sub makedate {
 331     my $secs = shift;
 332     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs);
 333     my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon];
 334     $year += 1900;
 335     return "$mday/$mname/$year";
 336 }
 337
 338 use Getopt::Long;
 339
 340 $DEF_SECTION = 1;
 341 $DEF_CENTER = "User Contributed Perl Documentation";
 342 $STD_CENTER = "Perl Programmers Reference Guide";
 343 $DEF_FIXED = 'CW';
 344 $DEF_LAX = 0;
 345
 346 sub usage {
 347     warn "$0: @_\n" if @_;
 348     die <<EOF;
 349 usage: $0 [options] podpage
 350 Options are:
 351         --section=manext      (default "$DEF_SECTION")
 352         --release=relpatch    (default "$DEF_RELEASE")
 353         --center=string       (default "$DEF_CENTER")
 354         --date=string         (default "$DEF_DATE")
 355         --fixed=font          (default "$DEF_FIXED")
 356         --official            (default NOT)
 357         --lax                 (default NOT)
 358 EOF
 359 }
 360
 361 $uok = GetOptions( qw(
 362         section=s
 363         release=s
 364         center=s
 365         date=s
 366         fixed=s
 367         official
 368         lax
 369         help));
 370
 371 $DEF_DATE = makedate((stat($ARGV[0]))[9] || time());
 372
 373 usage("Usage error!") unless $uok;
 374 usage() if $opt_help;
 375 usage("Need one and only one podpage argument") unless @ARGV == 1;
 376
 377 $section = $opt_section || ($ARGV[0] =~ /\.pm$/
 378                                 ? $DEF_PM_SECTION : $DEF_SECTION);
 379 $RP = $opt_release || $DEF_RELEASE;
 380 $center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER);
 381 $lax = $opt_lax || $DEF_LAX;
 382
 383 $CFont = $opt_fixed || $DEF_FIXED;
 384
 385 if (length($CFont) == 2) {
 386     $CFont_embed = "\\f($CFont";
 387 }
 388 elsif (length($CFont) == 1) {
 389     $CFont_embed = "\\f$CFont";
 390 }
 391 else {
 392     die "roff font should be 1 or 2 chars, not `$CFont_embed'";
 393 }
 394
 395 $date = $opt_date || $DEF_DATE;
 396
 397 for (qw{NAME DESCRIPTION}) {
 398 # for (qw{NAME DESCRIPTION AUTHOR}) {
 399     $wanna_see{$_}++;
 400 }
 401 $wanna_see{SYNOPSIS}++ if $section =~ /^3/;
 402
 403
 404 $name = @ARGV ? $ARGV[0] : "<STDIN>";
 405 $Filename = $name;
 406 if ($section =~ /^1/) {
 407     require File::Basename;
 408     $name = uc File::Basename::basename($name);
 409 }
 410 $name =~ s/\.(pod|p[lm])$//i;
 411
 412 # Lose everything up to the first of
 413 #     */lib/*perl*      standard or site_perl module
 414 #     */*perl*/lib      from -D prefix=/opt/perl
 415 #     */*perl*/         random module hierarchy
 416 # which works.
 417 $name =~ s-//+-/-g;
 418 if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i
 419         or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i
 420         or $name =~ s-^.*?/[^/]*perl[^/]*/--i) {
 421     # Lose ^site(_perl)?/.
 422     $name =~ s-^site(_perl)?/--;
 423     # Lose ^arch/.      (XXX should we use Config? Just for archname?)
 424     $name =~ s~^(.*-$^O|$^O-.*)/~~o;
 425     # Lose ^version/.
 426     $name =~ s-^\d+\.\d+/--;
 427 }
 428
 429 # Translate Getopt/Long to Getopt::Long, etc.
 430 $name =~ s(/)(::)g;
 431
 432 if ($name ne 'something') {
 433     FCHECK: {
 434         open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!";
 435         while (<F>) {
 436             next unless /^=\b/;
 437             if (/^=head1\s+NAME\s*$/) {  # an /m would forgive mistakes
 438                 $_ = <F>;
 439                 unless (/\s*-+\s+/) {
 440                     $oops++;
 441                     warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n"
 442                 } else {
 443                     my @n = split /\s+-+\s+/;
 444                     if (@n != 2) {
 445                         $oops++;
 446                         warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n"
 447                     }
 448                     else {
 449                         %namedesc = @n;
 450                     }
 451                 }
 452                 last FCHECK;
 453             }
 454             next if /^=cut\b/;  # DB_File and Net::Ping have =cut before NAME
 455             next if /^=pod\b/;  # It is OK to have =pod before NAME
 456             die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax;
 457         }
 458         die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax;
 459     }
 460     close F;
 461 }
 462
 463 print <<"END";
 464 .rn '' }`
 465 ''' \$RCSfile\$\$Revision\$\$Date\$
 466 '''
 467 ''' \$Log\$
 468 '''
 469 .de Sh
 470 .br
 471 .if t .Sp
 472 .ne 5
 473 .PP
 474 \\fB\\\\\$1\\fR
 475 .PP
 476 ..
 477 .de Sp
 478 .if t .sp .5v
 479 .if n .sp
 480 ..
 481 .de Ip
 482 .br
 483 .ie \\\\n(.\$>=3 .ne \\\\\$3
 484 .el .ne 3
 485 .IP "\\\\\$1" \\\\\$2
 486 ..
 487 .de Vb
 488 .ft $CFont
 489 .nf
 490 .ne \\\\\$1
 491 ..
 492 .de Ve
 493 .ft R
 494
 495 .fi
 496 ..
 497 '''
 498 '''
 499 '''     Set up \\*(-- to give an unbreakable dash;
 500 '''     string Tr holds user defined translation string.
 501 '''     Bell System Logo is used as a dummy character.
 502 '''
 503 .tr \\(*W-|\\(bv\\*(Tr
 504 .ie n \\{\\
 505 .ds -- \\(*W-
 506 .ds PI pi
 507 .if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch
 508 .if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch
 509 .ds L" ""
 510 .ds R" ""
 511 '''   \\*(M", \\*(S", \\*(N" and \\*(T" are the equivalent of
 512 '''   \\*(L" and \\*(R", except that they are used on ".xx" lines,
 513 '''   such as .IP and .SH, which do another additional levels of
 514 '''   double-quote interpretation
 515 .ds M" """
 516 .ds S" """
 517 .ds N" """""
 518 .ds T" """""
 519 .ds L' '
 520 .ds R' '
 521 .ds M' '
 522 .ds S' '
 523 .ds N' '
 524 .ds T' '
 525 'br\\}
 526 .el\\{\\
 527 .ds -- \\(em\\|
 528 .tr \\*(Tr
 529 .ds L" ``
 530 .ds R" ''
 531 .ds M" ``
 532 .ds S" ''
 533 .ds N" ``
 534 .ds T" ''
 535 .ds L' `
 536 .ds R' '
 537 .ds M' `
 538 .ds S' '
 539 .ds N' `
 540 .ds T' '
 541 .ds PI \\(*p
 542 'br\\}
 543 END
 544
 545 print <<'END';
 546 .\"     If the F register is turned on, we'll generate
 547 .\"     index entries out stderr for the following things:
 548 .\"             TH      Title
 549 .\"             SH      Header
 550 .\"             Sh      Subsection
 551 .\"             Ip      Item
 552 .\"             X<>     Xref  (embedded
 553 .\"     Of course, you have to process the output yourself
 554 .\"     in some meaninful fashion.
 555 .if \nF \{
 556 .de IX
 557 .tm Index:\\$1\t\\n%\t"\\$2"
 558 ..
 559 .nr % 0
 560 .rr F
 561 .\}
 562 END
 563
 564 print <<"END";
 565 .TH $name $section "$RP" "$date" "$center"
 566 .UC
 567 END
 568
 569 push(@Indices, qq{.IX Title "$name $section"});
 570
 571 while (($name, $desc) = each %namedesc) {
 572     for ($name, $desc) { s/^\s+//; s/\s+$//; }
 573     push(@Indices, qq(.IX Name "$name - $desc"\n));
 574 }
 575
 576 print <<'END';
 577 .if n .hy 0
 578 .if n .na
 579 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
 580 .de CQ          \" put $1 in typewriter font
 581 END
 582 print ".ft $CFont\n";
 583 print <<'END';
 584 'if n "\c
 585 'if t \\&\\$1\c
 586 'if n \\&\\$1\c
 587 'if n \&"
 588 \\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
 589 '.ft R
 590 ..
 591 .\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
 592 .       \" AM - accent mark definitions
 593 .bd B 3
 594 .       \" fudge factors for nroff and troff
 595 .if n \{\
 596 .       ds #H 0
 597 .       ds #V .8m
 598 .       ds #F .3m
 599 .       ds #[ \f1
 600 .       ds #] \fP
 601 .\}
 602 .if t \{\
 603 .       ds #H ((1u-(\\\\n(.fu%2u))*.13m)
 604 .       ds #V .6m
 605 .       ds #F 0
 606 .       ds #[ \&
 607 .       ds #] \&
 608 .\}
 609 .       \" simple accents for nroff and troff
 610 .if n \{\
 611 .       ds ' \&
 612 .       ds ` \&
 613 .       ds ^ \&
 614 .       ds , \&
 615 .       ds ~ ~
 616 .       ds ? ?
 617 .       ds ! !
 618 .       ds /
 619 .       ds q
 620 .\}
 621 .if t \{\
 622 .       ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
 623 .       ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
 624 .       ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
 625 .       ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
 626 .       ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 627 .       ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
 628 .       ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
 629 .       ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 630 .       ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
 631 .\}
 632 .       \" troff and (daisy-wheel) nroff accents
 633 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 634 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 635 .ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
 636 .ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
 637 .ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
 638 .ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
 639 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 640 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 641 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 642 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 643 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 644 .ds ae a\h'-(\w'a'u*4/10)'e
 645 .ds Ae A\h'-(\w'A'u*4/10)'E
 646 .ds oe o\h'-(\w'o'u*4/10)'e
 647 .ds Oe O\h'-(\w'O'u*4/10)'E
 648 .       \" corrections for vroff
 649 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 650 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 651 .       \" for low resolution devices (crt and lpr)
 652 .if \n(.H>23 .if \n(.V>19 \
 653 \{\
 654 .       ds : e
 655 .       ds 8 ss
 656 .       ds v \h'-1'\o'\(aa\(ga'
 657 .       ds _ \h'-1'^
 658 .       ds . \h'-1'.
 659 .       ds 3 3
 660 .       ds o a
 661 .       ds d- d\h'-1'\(ga
 662 .       ds D- D\h'-1'\(hy
 663 .       ds th \o'bp'
 664 .       ds Th \o'LP'
 665 .       ds ae ae
 666 .       ds Ae AE
 667 .       ds oe oe
 668 .       ds Oe OE
 669 .\}
 670 .rm #[ #] #H #V #F C
 671 END
 672
 673 $indent = 0;
 674
 675 $begun = "";
 676
 677 # Unrolling [^A-Z>]|[A-Z](?!<) gives:    // MRE pp 165.
 678 my $nonest = '(?:[^A-Z>]*(?:[A-Z](?!<)[^A-Z>]*)*)';
 679
 680 while (<>) {
 681     if ($cutting) {
 682         next unless /^=/;
 683         $cutting = 0;
 684     }
 685     if ($begun) {
 686         if (/^=end\s+$begun/) {
 687             $begun = "";
 688         }
 689         elsif ($begun =~ /^(roff|man)$/) {
 690             print STDOUT $_;
 691         }
 692         next;
 693     }
 694     chomp;
 695
 696     # Translate verbatim paragraph
 697
 698     if (/^\s/) {
 699         @lines = split(/\n/);
 700         for (@lines) {
 701             1 while s
 702                 {^( [^\t]* ) \t ( \t* ) }
 703                 { $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex;
 704             s/\\/\\e/g;
 705             s/\A/\\&/s;
 706         }
 707         $lines = @lines;
 708         makespace() unless $verbatim++;
 709         print ".Vb $lines\n";
 710         print join("\n", @lines), "\n";
 711         print ".Ve\n";
 712         $needspace = 0;
 713         next;
 714     }
 715
 716     $verbatim = 0;
 717
 718     if (/^=for\s+(\S+)\s*/s) {
 719         if ($1 eq "man" or $1 eq "roff") {
 720             print STDOUT $',"\n\n";
 721         } else {
 722             # ignore unknown for
 723         }
 724         next;
 725     }
 726     elsif (/^=begin\s+(\S+)\s*/s) {
 727         $begun = $1;
 728         if ($1 eq "man" or $1 eq "roff") {
 729             print STDOUT $'."\n\n";
 730         }
 731         next;
 732     }
 733
 734     # check for things that'll hosed our noremap scheme; affects $_
 735     init_noremap();
 736
 737     if (!/^=item/) {
 738
 739         # trofficate backslashes; must do it before what happens below
 740         s/\\/noremap('\\e')/ge;
 741
 742         # protect leading periods and quotes against *roff
 743         # mistaking them for directives
 744         s/^(?:[A-Z]<)?[.']/\\&$&/gm;
 745
 746         # first hide the escapes in case we need to
 747         # intuit something and get it wrong due to fmting
 748
 749         1 while s/([A-Z]<$nonest>)/noremap($1)/ge;
 750
 751         # func() is a reference to a perl function
 752         s{
 753             \b
 754             (
 755                 [:\w]+ \(\)
 756             )
 757         } {I<$1>}gx;
 758
 759         # func(n) is a reference to a perl function or a man page
 760         s{
 761             ([:\w]+)
 762             (
 763                 \( [^\051]+ \)
 764             )
 765         } {I<$1>\\|$2}gx;
 766
 767         # convert simple variable references
 768         s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g;
 769
 770         if (m{ (
 771                     [\-\w]+
 772                     \(
 773                         [^\051]*?
 774                         [\@\$,]
 775                         [^\051]*?
 776                     \)
 777                 )
 778             }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
 779         {
 780             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n";
 781             $oops++;
 782         }
 783
 784         while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
 785             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n";
 786             $oops++;
 787         }
 788
 789         # put it back so we get the <> processed again;
 790         clear_noremap(0); # 0 means leave the E's
 791
 792     } else {
 793         # trofficate backslashes
 794         s/\\/noremap('\\e')/ge;
 795
 796     }
 797
 798     # need to hide E<> first; they're processed in clear_noremap
 799     s/(E<[^<>]+>)/noremap($1)/ge;
 800
 801
 802     $maxnest = 10;
 803     while ($maxnest-- && /[A-Z]</) {
 804
 805         # can't do C font here
 806         s/([BI])<($nonest)>/font($1) . $2 . font('R')/eg;
 807
 808         # files and filelike refs in italics
 809         s/F<($nonest)>/I<$1>/g;
 810
 811         # no break -- usually we want C<> for this
 812         s/S<($nonest)>/nobreak($1)/eg;
 813
 814         # LREF: a la HREF L<show this text|man/section>
 815         s:L<([^|>]+)\|[^>]+>:$1:g;
 816
 817         # LREF: a manpage(3f)
 818         s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g;
 819
 820         # LREF: an =item on another manpage
 821         s{
 822             L<
 823                 ([^/]+)
 824                 /
 825                 (
 826                     [:\w]+
 827                     (\(\))?
 828                 )
 829             >
 830         } {the C<$2> entry in the I<$1> manpage}gx;
 831
 832         # LREF: an =item on this manpage
 833         s{
 834            ((?:
 835             L<
 836                 /
 837                 (
 838                     [:\w]+
 839                     (\(\))?
 840                 )
 841             >
 842             (,?\s+(and\s+)?)?
 843           )+)
 844         } { internal_lrefs($1) }gex;
 845
 846         # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
 847         # the "func" can disambiguate
 848         s{
 849             L<
 850                 (?:
 851                     ([a-zA-Z]\S+?) /
 852                 )?
 853                 "?(.*?)"?
 854             >
 855         }{
 856             do {
 857                 $1      # if no $1, assume it means on this page.
 858                     ?  "the section on I<$2> in the I<$1> manpage"
 859                     :  "the section on I<$2>"
 860             }
 861         }gesx; # s in case it goes over multiple lines, so . matches \n
 862
 863         s/Z<>/\\&/g;
 864
 865         # comes last because not subject to reprocessing
 866         s/C<($nonest)>/noremap("${CFont_embed}${1}\\fR")/eg;
 867     }
 868
 869     if (s/^=//) {
 870         $needspace = 0;         # Assume this.
 871
 872         s/\n/ /g;
 873
 874         ($Cmd, $_) = split(' ', $_, 2);
 875
 876         $dotlevel = 1;
 877         if ($Cmd eq 'head1') {
 878            $dotlevel = 1;
 879         }
 880         elsif ($Cmd eq 'head2') {
 881            $dotlevel = 1;
 882         }
 883         elsif ($Cmd eq 'item') {
 884            $dotlevel = 2;
 885         }
 886
 887         if (defined $_) {
 888             &escapes($dotlevel);
 889             s/"/""/g;
 890         }
 891
 892         clear_noremap(1);
 893
 894         if ($Cmd eq 'cut') {
 895             $cutting = 1;
 896         }
 897         elsif ($Cmd eq 'head1') {
 898             s/\s+$//;
 899             delete $wanna_see{$_} if exists $wanna_see{$_};
 900             print qq{.SH "$_"\n};
 901       push(@Indices, qq{.IX Header "$_"\n});
 902         }
 903         elsif ($Cmd eq 'head2') {
 904             print qq{.Sh "$_"\n};
 905       push(@Indices, qq{.IX Subsection "$_"\n});
 906         }
 907         elsif ($Cmd eq 'over') {
 908             push(@indent,$indent);
 909             $indent += ($_ + 0) || 5;
 910         }
 911         elsif ($Cmd eq 'back') {
 912             $indent = pop(@indent);
 913             warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent;
 914             $needspace = 1;
 915         }
 916         elsif ($Cmd eq 'item') {
 917             s/^\*( |$)/\\(bu$1/g;
 918             # if you know how to get ":s please do
 919             s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g;
 920             s/\\\*\(L"([^"]+?)""/'$1'/g;
 921             s/[^"]""([^"]+?)""[^"]/'$1'/g;
 922             # here do something about the $" in perlvar?
 923             print STDOUT qq{.Ip "$_" $indent\n};
 924       push(@Indices, qq{.IX Item "$_"\n});
 925         }
 926         elsif ($Cmd eq 'pod') {
 927             # this is just a comment
 928         }
 929         else {
 930             warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n";
 931         }
 932     }
 933     else {
 934         if ($needspace) {
 935             &makespace;
 936         }
 937         &escapes(0);
 938         clear_noremap(1);
 939         print $_, "\n";
 940         $needspace = 1;
 941     }
 942 }
 943
 944 print <<"END";
 945
 946 .rn }` ''
 947 END
 948
 949 if (%wanna_see && !$lax) {
 950     @missing = keys %wanna_see;
 951     warn "$0: $Filename is missing required section"
 952         .  (@missing > 1 && "s")
 953         .  ": @missing\n";
 954     $oops++;
 955 }
 956
 957 foreach (@Indices) { print "$_\n"; }
 958
 959 exit;
 960 #exit ($oops != 0);
 961
 962 #########################################################################
 963
 964 sub nobreak {
 965     my $string = shift;
 966     $string =~ s/ /\\ /g;
 967     $string;
 968 }
 969
 970 sub escapes {
 971     my $indot = shift;
 972
 973     s/X<(.*?)>/mkindex($1)/ge;
 974
 975     # translate the minus in foo-bar into foo\-bar for roff
 976     s/([^0-9a-z-])-([^-])/$1\\-$2/g;
 977
 978     # make -- into the string version \*(-- (defined above)
 979     s/\b--\b/\\*(--/g;
 980     s/"--([^"])/"\\*(--$1/g;  # should be a better way
 981     s/([^"])--"/$1\\*(--"/g;
 982
 983     # fix up quotes; this is somewhat tricky
 984     my $dotmacroL = 'L';
 985     my $dotmacroR = 'R';
 986     if ( $indot == 1 ) {
 987         $dotmacroL = 'M';
 988         $dotmacroR = 'S';
 989     }
 990     elsif ( $indot >= 2 ) {
 991         $dotmacroL = 'N';
 992         $dotmacroR = 'T';
 993     }
 994     if (!/""/) {
 995         s/(^|\s)(['"])/noremap("$1\\*($dotmacroL$2")/ge;
 996         s/(['"])($|[\-\s,;\\!?.])/noremap("\\*($dotmacroR$1$2")/ge;
 997     }
 998
 999     #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g;
1000     #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g;
1001
1002
1003     # make sure that func() keeps a bit a space tween the parens
1004     ### s/\b\(\)/\\|()/g;
1005     ### s/\b\(\)/(\\|)/g;
1006
1007     # make C++ into \*C+, which is a squinched version (defined above)
1008     s/\bC\+\+/\\*(C+/g;
1009
1010     # make double underbars have a little tiny space between them
1011     s/__/_\\|_/g;
1012
1013     # PI goes to \*(PI (defined above)
1014     s/\bPI\b/noremap('\\*(PI')/ge;
1015
1016     # make all caps a teeny bit smaller, but don't muck with embedded code literals
1017     my $hidCFont = font('C');
1018     if ($Cmd !~ /^head1/) { # SH already makes smaller
1019         # /g isn't enough; 1 while or we'll be off
1020
1021 #       1 while s{
1022 #           (?!$hidCFont)(..|^.|^)
1023 #           \b
1024 #           (
1025 #               [A-Z][\/A-Z+:\-\d_$.]+
1026 #           )
1027 #           (s?)
1028 #           \b
1029 #       } {$1\\s-1$2\\s0}gmox;
1030
1031         1 while s{
1032             (?!$hidCFont)(..|^.|^)
1033             (
1034                 \b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b
1035             )
1036         } {
1037             $1 . noremap( '\\s-1' .  $2 . '\\s0' )
1038         }egmox;
1039
1040     }
1041 }
1042
1043 # make troff just be normal, but make small nroff get quoted
1044 # decided to just put the quotes in the text; sigh;
1045 sub ccvt {
1046     local($_,$prev) = @_;
1047     noremap(qq{.CQ "$_" \n\\&});
1048 }
1049
1050 sub makespace {
1051     if ($indent) {
1052         print ".Sp\n";
1053     }
1054     else {
1055         print ".PP\n";
1056     }
1057 }
1058
1059 sub mkindex {
1060     my ($entry) = @_;
1061     my @entries = split m:\s*/\s*:, $entry;
1062     push @Indices, ".IX Xref " . join ' ', map {qq("$_")} @entries;
1063     return '';
1064 }
1065
1066 sub font {
1067     local($font) = shift;
1068     return '\\f' . noremap($font);
1069 }
1070
1071 sub noremap {
1072     local($thing_to_hide) = shift;
1073     $thing_to_hide =~ tr/\000-\177/\200-\377/;
1074     return $thing_to_hide;
1075 }
1076
1077 sub init_noremap {
1078         # escape high bit characters in input stream
1079         s/([\200-\377])/"E<".ord($1).">"/ge;
1080 }
1081
1082 sub clear_noremap {
1083     my $ready_to_print = $_[0];
1084
1085     tr/\200-\377/\000-\177/;
1086
1087     # trofficate backslashes
1088     # s/(?!\\e)(?:..|^.|^)\\/\\e/g;
1089
1090     # now for the E<>s, which have been hidden until now
1091     # otherwise the interative \w<> processing would have
1092     # been hosed by the E<gt>
1093     s {
1094             E<
1095             (
1096                 ( \d + )
1097                 | ( [A-Za-z]+ )
1098             )
1099             >
1100     } {
1101          do {
1102              defined $2
1103                 ? chr($2)
1104                 :
1105              exists $HTML_Escapes{$3}
1106                 ? do { $HTML_Escapes{$3} }
1107                 : do {
1108                     warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n";
1109                     "E<$1>";
1110                 }
1111          }
1112     }egx if $ready_to_print;
1113 }
1114
1115 sub internal_lrefs {
1116     local($_) = shift;
1117     local $trailing_and = s/and\s+$// ? "and " : "";
1118
1119     s{L</([^>]+)>}{$1}g;
1120     my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
1121     my $retstr = "the ";
1122     my $i;
1123     for ($i = 0; $i <= $#items; $i++) {
1124         $retstr .= "C<$items[$i]>";
1125         $retstr .= ", " if @items > 2 && $i != $#items;
1126         $retstr .= " and " if $i+2 == @items;
1127     }
1128
1129     $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
1130             .  " elsewhere in this document";
1131     # terminal space to avoid words running together (pattern used
1132     # strips terminal spaces)
1133     $retstr .= " " if length $trailing_and;
1134     $retstr .=  $trailing_and;
1135
1136     return $retstr;
1137
1138 }
1139
1140 BEGIN {
1141 %HTML_Escapes = (
1142     'amp'       =>      '&',    #   ampersand
1143     'lt'        =>      '<',    #   left chevron, less-than
1144     'gt'        =>      '>',    #   right chevron, greater-than
1145     'quot'      =>      '"',    #   double quote
1146
1147     "Aacute"    =>      "A\\*'",        #   capital A, acute accent
1148     "aacute"    =>      "a\\*'",        #   small a, acute accent
1149     "Acirc"     =>      "A\\*^",        #   capital A, circumflex accent
1150     "acirc"     =>      "a\\*^",        #   small a, circumflex accent
1151     "AElig"     =>      '\*(AE',        #   capital AE diphthong (ligature)
1152     "aelig"     =>      '\*(ae',        #   small ae diphthong (ligature)
1153     "Agrave"    =>      "A\\*`",        #   capital A, grave accent
1154     "agrave"    =>      "A\\*`",        #   small a, grave accent
1155     "Aring"     =>      'A\\*o',        #   capital A, ring
1156     "aring"     =>      'a\\*o',        #   small a, ring
1157     "Atilde"    =>      'A\\*~',        #   capital A, tilde
1158     "atilde"    =>      'a\\*~',        #   small a, tilde
1159     "Auml"      =>      'A\\*:',        #   capital A, dieresis or umlaut mark
1160     "auml"      =>      'a\\*:',        #   small a, dieresis or umlaut mark
1161     "Ccedil"    =>      'C\\*,',        #   capital C, cedilla
1162     "ccedil"    =>      'c\\*,',        #   small c, cedilla
1163     "Eacute"    =>      "E\\*'",        #   capital E, acute accent
1164     "eacute"    =>      "e\\*'",        #   small e, acute accent
1165     "Ecirc"     =>      "E\\*^",        #   capital E, circumflex accent
1166     "ecirc"     =>      "e\\*^",        #   small e, circumflex accent
1167     "Egrave"    =>      "E\\*`",        #   capital E, grave accent
1168     "egrave"    =>      "e\\*`",        #   small e, grave accent
1169     "ETH"       =>      '\\*(D-',       #   capital Eth, Icelandic
1170     "eth"       =>      '\\*(d-',       #   small eth, Icelandic
1171     "Euml"      =>      "E\\*:",        #   capital E, dieresis or umlaut mark
1172     "euml"      =>      "e\\*:",        #   small e, dieresis or umlaut mark
1173     "Iacute"    =>      "I\\*'",        #   capital I, acute accent
1174     "iacute"    =>      "i\\*'",        #   small i, acute accent
1175     "Icirc"     =>      "I\\*^",        #   capital I, circumflex accent
1176     "icirc"     =>      "i\\*^",        #   small i, circumflex accent
1177     "Igrave"    =>      "I\\*`",        #   capital I, grave accent
1178     "igrave"    =>      "i\\*`",        #   small i, grave accent
1179     "Iuml"      =>      "I\\*:",        #   capital I, dieresis or umlaut mark
1180     "iuml"      =>      "i\\*:",        #   small i, dieresis or umlaut mark
1181     "Ntilde"    =>      'N\*~',         #   capital N, tilde
1182     "ntilde"    =>      'n\*~',         #   small n, tilde
1183     "Oacute"    =>      "O\\*'",        #   capital O, acute accent
1184     "oacute"    =>      "o\\*'",        #   small o, acute accent
1185     "Ocirc"     =>      "O\\*^",        #   capital O, circumflex accent
1186     "ocirc"     =>      "o\\*^",        #   small o, circumflex accent
1187     "Ograve"    =>      "O\\*`",        #   capital O, grave accent
1188     "ograve"    =>      "o\\*`",        #   small o, grave accent
1189     "Oslash"    =>      "O\\*/",        #   capital O, slash
1190     "oslash"    =>      "o\\*/",        #   small o, slash
1191     "Otilde"    =>      "O\\*~",        #   capital O, tilde
1192     "otilde"    =>      "o\\*~",        #   small o, tilde
1193     "Ouml"      =>      "O\\*:",        #   capital O, dieresis or umlaut mark
1194     "ouml"      =>      "o\\*:",        #   small o, dieresis or umlaut mark
1195     "szlig"     =>      '\*8',          #   small sharp s, German (sz ligature)
1196     "THORN"     =>      '\\*(Th',       #   capital THORN, Icelandic
1197     "thorn"     =>      '\\*(th',,      #   small thorn, Icelandic
1198     "Uacute"    =>      "U\\*'",        #   capital U, acute accent
1199     "uacute"    =>      "u\\*'",        #   small u, acute accent
1200     "Ucirc"     =>      "U\\*^",        #   capital U, circumflex accent
1201     "ucirc"     =>      "u\\*^",        #   small u, circumflex accent
1202     "Ugrave"    =>      "U\\*`",        #   capital U, grave accent
1203     "ugrave"    =>      "u\\*`",        #   small u, grave accent
1204     "Uuml"      =>      "U\\*:",        #   capital U, dieresis or umlaut mark
1205     "uuml"      =>      "u\\*:",        #   small u, dieresis or umlaut mark
1206     "Yacute"    =>      "Y\\*'",        #   capital Y, acute accent
1207     "yacute"    =>      "y\\*'",        #   small y, acute accent
1208     "yuml"      =>      "y\\*:",        #   small y, dieresis or umlaut mark
1209 );
1210 }
1211
1212 !NO!SUBS!
1213
1214 close OUT or die "Can't close $file: $!";
1215 chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
1216 exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
1217 chdir $origdir;