pod/pod2man.PL

   1 #!/usr/local/bin/perl
   2
   3 use Config;
   4 use File::Basename qw(&basename &dirname);
   5
   6 # List explicitly here the variables you want Configure to
   7 # generate.  Metaconfig only looks for shell variables, so you
   8 # have to mention them as if they were shell variables, not
   9 # %Config entries.  Thus you write
  10 #  $startperl
  11 #  $man3ext
  12 # to ensure Configure will look for $Config{startperl}.
  13
  14 $file = basename($0);
  15 $file =~ s/\.PL$//i;
  16 $file .= '.com' if $^O eq 'VMS';
  17
  18 chdir("pod") or die "Can't chdir to pod: $!";
  19 open OUT,">$file" or die "Can't create $file: $!";
  20
  21 print "Extracting $file (with variable substitutions)\n";
  22
  23 # In this section, perl variables will be expanded during extraction.
  24 # You can use $Config{...} to use Configure variables.
  25
  26 print OUT <<"!GROK!THIS!";
  27 $Config{startperl}
  28     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
  29         if \$running_under_some_shell;
  30
  31 \$DEF_PM_SECTION = '$Config{man3ext}' || '3';
  32 !GROK!THIS!
  33
  34 # In the following, perl variables are not expanded during extraction.
  35
  36 print OUT <<'!NO!SUBS!';
  37
  38 =head1 NAME
  39
  40 pod2man - translate embedded Perl pod directives into man pages
  41
  42 =head1 SYNOPSIS
  43
  44 B<pod2man>
  45 [ B<--section=>I<manext> ]
  46 [ B<--release=>I<relpatch> ]
  47 [ B<--center=>I<string> ]
  48 [ B<--date=>I<string> ]
  49 [ B<--fixed=>I<font> ]
  50 [ B<--official> ]
  51 [ B<--lax> ]
  52 I<inputfile>
  53
  54 =head1 DESCRIPTION
  55
  56 B<pod2man> converts its input file containing embedded pod directives (see
  57 L<perlpod>) into nroff source suitable for viewing with nroff(1) or
  58 troff(1) using the man(7) macro set.
  59
  60 Besides the obvious pod conversions, B<pod2man> also takes care of
  61 func(), func(n), and simple variable references like $foo or @bar so
  62 you don't have to use code escapes for them; complex expressions like
  63 C<$fred{'stuff'}> will still need to be escaped, though.  Other nagging
  64 little roffish things that it catches include translating the minus in
  65 something like foo-bar, making a long dash--like this--into a real em
  66 dash, fixing up "paired quotes", putting a little space after the
  67 parens in something like func(), making C++ and PI look right, making
  68 double underbars have a little tiny space between them, making ALLCAPS
  69 a teeny bit smaller in troff(1), and escaping backslashes so you don't
  70 have to.
  71
  72 =head1 OPTIONS
  73
  74 =over 8
  75
  76 =item center
  77
  78 Set the centered header to a specific string.  The default is
  79 "User Contributed Perl Documentation", unless the C<--official> flag is
  80 given, in which case the default is "Perl Programmers Reference Guide".
  81
  82 =item date
  83
  84 Set the left-hand footer string to this value.  By default,
  85 the modification date of the input file will be used.
  86
  87 =item fixed
  88
  89 The fixed font to use for code refs.  Defaults to CW.
  90
  91 =item official
  92
  93 Set the default header to indicate that this page is of
  94 the standard release in case C<--center> is not given.
  95
  96 =item release
  97
  98 Set the centered footer.  By default, this is the current
  99 perl release.
 100
 101 =item section
 102
 103 Set the section for the C<.TH> macro.  The standard conventions on
 104 sections are to use 1 for user commands,  2 for system calls, 3 for
 105 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
 106 miscellaneous information, and 8 for administrator commands.  This works
 107 best if you put your Perl man pages in a separate tree, like
 108 F</usr/local/perl/man/>.  By default, section 1 will be used
 109 unless the file ends in F<.pm> in which case section 3 will be selected.
 110
 111 =item lax
 112
 113 Don't complain when required sections aren't present.
 114
 115 =back
 116
 117 =head1 Anatomy of a Proper Man Page
 118
 119 For those not sure of the proper layout of a man page, here's
 120 an example of the skeleton of a proper man page.  Head of the
 121 major headers should be setout as a C<=head1> directive, and
 122 are historically written in the rather startling ALL UPPER CASE
 123 format, although this is not mandatory.
 124 Minor headers may be included using C<=head2>, and are
 125 typically in mixed case.
 126
 127 =over 10
 128
 129 =item NAME
 130
 131 Mandatory section; should be a comma-separated list of programs or
 132 functions documented by this podpage, such as:
 133
 134     foo, bar - programs to do something
 135
 136 =item SYNOPSIS
 137
 138 A short usage summary for programs and functions, which
 139 may someday be deemed mandatory.
 140
 141 =item DESCRIPTION
 142
 143 Long drawn out discussion of the program.  It's a good idea to break this
 144 up into subsections using the C<=head2> directives, like
 145
 146     =head2 A Sample Subection
 147
 148     =head2 Yet Another Sample Subection
 149
 150 =item OPTIONS
 151
 152 Some people make this separate from the description.
 153
 154 =item RETURN VALUE
 155
 156 What the program or function returns if successful.
 157
 158 =item ERRORS
 159
 160 Exceptions, return codes, exit stati, and errno settings.
 161
 162 =item EXAMPLES
 163
 164 Give some example uses of the program.
 165
 166 =item ENVIRONMENT
 167
 168 Envariables this program might care about.
 169
 170 =item FILES
 171
 172 All files used by the program.  You should probably use the FE<lt>E<gt>
 173 for these.
 174
 175 =item SEE ALSO
 176
 177 Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8).
 178
 179 =item NOTES
 180
 181 Miscellaneous commentary.
 182
 183 =item CAVEATS
 184
 185 Things to take special care with; sometimes called WARNINGS.
 186
 187 =item DIAGNOSTICS
 188
 189 All possible messages the program can print out--and
 190 what they mean.
 191
 192 =item BUGS
 193
 194 Things that are broken or just don't work quite right.
 195
 196 =item RESTRICTIONS
 197
 198 Bugs you don't plan to fix :-)
 199
 200 =item AUTHOR
 201
 202 Who wrote it (or AUTHORS if multiple).
 203
 204 =item HISTORY
 205
 206 Programs derived from other sources sometimes have this, or
 207 you might keep a modification log here.
 208
 209 =back
 210
 211 =head1 EXAMPLES
 212
 213     pod2man program > program.1
 214     pod2man some_module.pm > /usr/perl/man/man3/some_module.3
 215     pod2man --section=7 note.pod > note.7
 216
 217 =head1 DIAGNOSTICS
 218
 219 The following diagnostics are generated by B<pod2man>.  Items
 220 marked "(W)" are non-fatal, whereas the "(F)" errors will cause
 221 B<pod2man> to immediately exit with a non-zero status.
 222
 223 =over 4
 224
 225 =item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s>
 226
 227 (W) If you start include an option, you should set it off
 228 as bold, italic, or code.
 229
 230 =item can't open %s: %s
 231
 232 (F) The input file wasn't available for the given reason.
 233
 234 =item Improper man page - no dash in NAME header in paragraph %d of %s
 235
 236 (W) The NAME header did not have an isolated dash in it.  This is
 237 considered important.
 238
 239 =item Invalid man page - no NAME line in %s
 240
 241 (F) You did not include a NAME header, which is essential.
 242
 243 =item roff font should be 1 or 2 chars, not `%s'  (F)
 244
 245 (F) The font specified with the C<--fixed> option was not
 246 a one- or two-digit roff font.
 247
 248 =item %s is missing required section: %s
 249
 250 (W) Required sections include NAME, DESCRIPTION, and if you're
 251 using a section starting with a 3, also a SYNOPSIS.  Actually,
 252 not having a NAME is a fatal.
 253
 254 =item Unknown escape: %s in %s
 255
 256 (W) An unknown HTML entity (probably for an 8-bit character) was given via
 257 a C<EE<lt>E<gt>> directive.  Besides amp, lt, gt, and quot, recognized
 258 entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave,
 259 Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute,
 260 Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc,
 261 icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc,
 262 ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig,
 263 THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml,
 264 Yacute, yacute, and yuml.
 265
 266 =item Unmatched =back
 267
 268 (W) You have a C<=back> without a corresponding C<=over>.
 269
 270 =item Unrecognized pod directive: %s
 271
 272 (W) You specified a pod directive that isn't in the known list of
 273 C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>.
 274
 275
 276 =back
 277
 278 =head1 NOTES
 279
 280 If you would like to print out a lot of man page continuously, you
 281 probably want to set the C and D registers to set contiguous page
 282 numbering and even/odd paging, at least on some versions of man(7).
 283 Settting the F register will get you some additional experimental
 284 indexing:
 285
 286     troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ...
 287
 288 The indexing merely outputs messages via C<.tm> for each
 289 major page, section, subsection, item, and any C<XE<lt>E<gt>>
 290 directives.
 291
 292
 293 =head1 RESTRICTIONS
 294
 295 None at this time.
 296
 297 =head1 BUGS
 298
 299 The =over and =back directives don't really work right.  They
 300 take absolute positions instead of offsets, don't nest well, and
 301 making people count is suboptimal in any event.
 302
 303 =head1 AUTHORS
 304
 305 Original prototype by Larry Wall, but so massively hacked over by
 306 Tom Christiansen such that Larry probably doesn't recognize it anymore.
 307
 308 =cut
 309
 310 $/ = "";
 311 $cutting = 1;
 312 @Indices = ();
 313
 314 # We try first to get the version number from a local binary, in case we're
 315 # running an installed version of Perl to produce documentation from an
 316 # uninstalled newer version's pod files.
 317 if ($^O ne 'plan9' and $^O ne 'dos' and $^O ne 'os2' and $^O ne 'MSWin32') {
 318   my $perl = (-x './perl' && -f './perl' ) ?
 319                  './perl' :
 320                  ((-x '../perl' && -f '../perl') ?
 321                       '../perl' :
 322                       '');
 323   ($version,$patch) = `$perl -e 'print $]'` =~ /^(\d\.\d{3})(\d{2})?/ if $perl;
 324 }
 325 # No luck; we'll just go with the running Perl's version
 326 ($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version;
 327 $DEF_RELEASE  = "perl $version";
 328 $DEF_RELEASE .= ", patch $patch" if $patch;
 329
 330
 331 sub makedate {
 332     my $secs = shift;
 333     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs);
 334     my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon];
 335     $year += 1900;
 336     return "$mday/$mname/$year";
 337 }
 338
 339 use Getopt::Long;
 340
 341 $DEF_SECTION = 1;
 342 $DEF_CENTER = "User Contributed Perl Documentation";
 343 $STD_CENTER = "Perl Programmers Reference Guide";
 344 $DEF_FIXED = 'CW';
 345 $DEF_LAX = 0;
 346
 347 sub usage {
 348     warn "$0: @_\n" if @_;
 349     die <<EOF;
 350 usage: $0 [options] podpage
 351 Options are:
 352         --section=manext      (default "$DEF_SECTION")
 353         --release=relpatch    (default "$DEF_RELEASE")
 354         --center=string       (default "$DEF_CENTER")
 355         --date=string         (default "$DEF_DATE")
 356         --fixed=font          (default "$DEF_FIXED")
 357         --official            (default NOT)
 358         --lax                 (default NOT)
 359 EOF
 360 }
 361
 362 $uok = GetOptions( qw(
 363         section=s
 364         release=s
 365         center=s
 366         date=s
 367         fixed=s
 368         official
 369         lax
 370         help));
 371
 372 $DEF_DATE = makedate((stat($ARGV[0]))[9] || time());
 373
 374 usage("Usage error!") unless $uok;
 375 usage() if $opt_help;
 376 usage("Need one and only one podpage argument") unless @ARGV == 1;
 377
 378 $section = $opt_section || ($ARGV[0] =~ /\.pm$/
 379                                 ? $DEF_PM_SECTION : $DEF_SECTION);
 380 $RP = $opt_release || $DEF_RELEASE;
 381 $center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER);
 382 $lax = $opt_lax || $DEF_LAX;
 383
 384 $CFont = $opt_fixed || $DEF_FIXED;
 385
 386 if (length($CFont) == 2) {
 387     $CFont_embed = "\\f($CFont";
 388 }
 389 elsif (length($CFont) == 1) {
 390     $CFont_embed = "\\f$CFont";
 391 }
 392 else {
 393     die "roff font should be 1 or 2 chars, not `$CFont_embed'";
 394 }
 395
 396 $date = $opt_date || $DEF_DATE;
 397
 398 for (qw{NAME DESCRIPTION}) {
 399 # for (qw{NAME DESCRIPTION AUTHOR}) {
 400     $wanna_see{$_}++;
 401 }
 402 $wanna_see{SYNOPSIS}++ if $section =~ /^3/;
 403
 404
 405 $name = @ARGV ? $ARGV[0] : "<STDIN>";
 406 $Filename = $name;
 407 if ($section =~ /^1/) {
 408     require File::Basename;
 409     $name = uc File::Basename::basename($name);
 410 }
 411 $name =~ s/\.(pod|p[lm])$//i;
 412
 413 # Lose everything up to the first of
 414 #     */lib/*perl*      standard or site_perl module
 415 #     */*perl*/lib      from -D prefix=/opt/perl
 416 #     */*perl*/         random module hierarchy
 417 # which works.
 418 $name =~ s-//+-/-g;
 419 if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i
 420         or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i
 421         or $name =~ s-^.*?/[^/]*perl[^/]*/--i) {
 422     # Lose ^site(_perl)?/.
 423     $name =~ s-^site(_perl)?/--;
 424     # Lose ^arch/.      (XXX should we use Config? Just for archname?)
 425     $name =~ s~^(.*-$^O|$^O-.*)/~~o;
 426     # Lose ^version/.
 427     $name =~ s-^\d+\.\d+/--;
 428 }
 429
 430 # Translate Getopt/Long to Getopt::Long, etc.
 431 $name =~ s(/)(::)g;
 432
 433 if ($name ne 'something') {
 434     FCHECK: {
 435         open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!";
 436         while (<F>) {
 437             next unless /^=\b/;
 438             if (/^=head1\s+NAME\s*$/) {  # an /m would forgive mistakes
 439                 $_ = <F>;
 440                 unless (/\s*-+\s+/) {
 441                     $oops++;
 442                     warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n"
 443                 } else {
 444                     my @n = split /\s+-+\s+/;
 445                     if (@n != 2) {
 446                         $oops++;
 447                         warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n"
 448                     }
 449                     else {
 450                         %namedesc = @n;
 451                     }
 452                 }
 453                 last FCHECK;
 454             }
 455             next if /^=cut\b/;  # DB_File and Net::Ping have =cut before NAME
 456             next if /^=pod\b/;  # It is OK to have =pod before NAME
 457             die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax;
 458         }
 459         die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax;
 460     }
 461     close F;
 462 }
 463
 464 print <<"END";
 465 .rn '' }`
 466 ''' \$RCSfile\$\$Revision\$\$Date\$
 467 '''
 468 ''' \$Log\$
 469 '''
 470 .de Sh
 471 .br
 472 .if t .Sp
 473 .ne 5
 474 .PP
 475 \\fB\\\\\$1\\fR
 476 .PP
 477 ..
 478 .de Sp
 479 .if t .sp .5v
 480 .if n .sp
 481 ..
 482 .de Ip
 483 .br
 484 .ie \\\\n(.\$>=3 .ne \\\\\$3
 485 .el .ne 3
 486 .IP "\\\\\$1" \\\\\$2
 487 ..
 488 .de Vb
 489 .ft $CFont
 490 .nf
 491 .ne \\\\\$1
 492 ..
 493 .de Ve
 494 .ft R
 495
 496 .fi
 497 ..
 498 '''
 499 '''
 500 '''     Set up \\*(-- to give an unbreakable dash;
 501 '''     string Tr holds user defined translation string.
 502 '''     Bell System Logo is used as a dummy character.
 503 '''
 504 .tr \\(*W-|\\(bv\\*(Tr
 505 .ie n \\{\\
 506 .ds -- \\(*W-
 507 .ds PI pi
 508 .if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch
 509 .if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch
 510 .ds L" ""
 511 .ds R" ""
 512 '''   \\*(M", \\*(S", \\*(N" and \\*(T" are the equivalent of
 513 '''   \\*(L" and \\*(R", except that they are used on ".xx" lines,
 514 '''   such as .IP and .SH, which do another additional levels of
 515 '''   double-quote interpretation
 516 .ds M" """
 517 .ds S" """
 518 .ds N" """""
 519 .ds T" """""
 520 .ds L' '
 521 .ds R' '
 522 .ds M' '
 523 .ds S' '
 524 .ds N' '
 525 .ds T' '
 526 'br\\}
 527 .el\\{\\
 528 .ds -- \\(em\\|
 529 .tr \\*(Tr
 530 .ds L" ``
 531 .ds R" ''
 532 .ds M" ``
 533 .ds S" ''
 534 .ds N" ``
 535 .ds T" ''
 536 .ds L' `
 537 .ds R' '
 538 .ds M' `
 539 .ds S' '
 540 .ds N' `
 541 .ds T' '
 542 .ds PI \\(*p
 543 'br\\}
 544 END
 545
 546 print <<'END';
 547 .\"     If the F register is turned on, we'll generate
 548 .\"     index entries out stderr for the following things:
 549 .\"             TH      Title
 550 .\"             SH      Header
 551 .\"             Sh      Subsection
 552 .\"             Ip      Item
 553 .\"             X<>     Xref  (embedded
 554 .\"     Of course, you have to process the output yourself
 555 .\"     in some meaninful fashion.
 556 .if \nF \{
 557 .de IX
 558 .tm Index:\\$1\t\\n%\t"\\$2"
 559 ..
 560 .nr % 0
 561 .rr F
 562 .\}
 563 END
 564
 565 print <<"END";
 566 .TH $name $section "$date" "$RP" "$center"
 567 .UC
 568 END
 569
 570 push(@Indices, qq{.IX Title "$name $section"});
 571
 572 while (($name, $desc) = each %namedesc) {
 573     for ($name, $desc) { s/^\s+//; s/\s+$//; }
 574     push(@Indices, qq(.IX Name "$name - $desc"\n));
 575 }
 576
 577 print <<'END';
 578 .if n .hy 0
 579 .if n .na
 580 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
 581 .de CQ          \" put $1 in typewriter font
 582 END
 583 print ".ft $CFont\n";
 584 print <<'END';
 585 'if n "\c
 586 'if t \\&\\$1\c
 587 'if n \\&\\$1\c
 588 'if n \&"
 589 \\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
 590 '.ft R
 591 ..
 592 .\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
 593 .       \" AM - accent mark definitions
 594 .bd B 3
 595 .       \" fudge factors for nroff and troff
 596 .if n \{\
 597 .       ds #H 0
 598 .       ds #V .8m
 599 .       ds #F .3m
 600 .       ds #[ \f1
 601 .       ds #] \fP
 602 .\}
 603 .if t \{\
 604 .       ds #H ((1u-(\\\\n(.fu%2u))*.13m)
 605 .       ds #V .6m
 606 .       ds #F 0
 607 .       ds #[ \&
 608 .       ds #] \&
 609 .\}
 610 .       \" simple accents for nroff and troff
 611 .if n \{\
 612 .       ds ' \&
 613 .       ds ` \&
 614 .       ds ^ \&
 615 .       ds , \&
 616 .       ds ~ ~
 617 .       ds ? ?
 618 .       ds ! !
 619 .       ds /
 620 .       ds q
 621 .\}
 622 .if t \{\
 623 .       ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
 624 .       ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
 625 .       ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
 626 .       ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
 627 .       ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 628 .       ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
 629 .       ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
 630 .       ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 631 .       ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
 632 .\}
 633 .       \" troff and (daisy-wheel) nroff accents
 634 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 635 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 636 .ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
 637 .ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
 638 .ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
 639 .ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
 640 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 641 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 642 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 643 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 644 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 645 .ds ae a\h'-(\w'a'u*4/10)'e
 646 .ds Ae A\h'-(\w'A'u*4/10)'E
 647 .ds oe o\h'-(\w'o'u*4/10)'e
 648 .ds Oe O\h'-(\w'O'u*4/10)'E
 649 .       \" corrections for vroff
 650 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 651 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 652 .       \" for low resolution devices (crt and lpr)
 653 .if \n(.H>23 .if \n(.V>19 \
 654 \{\
 655 .       ds : e
 656 .       ds 8 ss
 657 .       ds v \h'-1'\o'\(aa\(ga'
 658 .       ds _ \h'-1'^
 659 .       ds . \h'-1'.
 660 .       ds 3 3
 661 .       ds o a
 662 .       ds d- d\h'-1'\(ga
 663 .       ds D- D\h'-1'\(hy
 664 .       ds th \o'bp'
 665 .       ds Th \o'LP'
 666 .       ds ae ae
 667 .       ds Ae AE
 668 .       ds oe oe
 669 .       ds Oe OE
 670 .\}
 671 .rm #[ #] #H #V #F C
 672 END
 673
 674 $indent = 0;
 675
 676 $begun = "";
 677
 678 # Unrolling [^-=A-Z>]|[A-Z](?!<)|[-=](?![A-Z]<)[\x00-\xFF] gives: // MRE pp 165.
 679 my $nonest = q{(?x)             # Turn on /x mode.
 680               (?:               # Group
 681                 [^-=A-Z>]*      # Anything that isn't a dash, equal sign or
 682                                 # closing hook isn't special. Eat as much as
 683                                 # we can.
 684                 (?:             # Group.
 685                   (?:           # Group.
 686                     [-=]        # We want to recognize -> and =>.
 687                     (?![A-Z]<)  # So, as long as it isn't followed by markup
 688                     [\x00-\xFF] # anything may follow - and =
 689                     |
 690                     [A-Z]       # Capitals are fine too,
 691                     (?!<)       # But not if they start markup.
 692                   )             # End of special sequences.
 693                   [^-=A-Z>]*    # Followed by zero or more non-special chars.
 694                 )*              # And we can repeat this as often as we can.
 695               )};               # That's all folks.
 696
 697 while (<>) {
 698     if ($cutting) {
 699         next unless /^=/;
 700         $cutting = 0;
 701     }
 702     if ($begun) {
 703         if (/^=end\s+$begun/) {
 704             $begun = "";
 705         }
 706         elsif ($begun =~ /^(roff|man)$/) {
 707             print STDOUT $_;
 708         }
 709         next;
 710     }
 711     chomp;
 712
 713     # Translate verbatim paragraph
 714
 715     if (/^\s/) {
 716         @lines = split(/\n/);
 717         for (@lines) {
 718             1 while s
 719                 {^( [^\t]* ) \t ( \t* ) }
 720                 { $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex;
 721             s/\\/\\e/g;
 722             s/\A/\\&/s;
 723         }
 724         $lines = @lines;
 725         makespace() unless $verbatim++;
 726         print ".Vb $lines\n";
 727         print join("\n", @lines), "\n";
 728         print ".Ve\n";
 729         $needspace = 0;
 730         next;
 731     }
 732
 733     $verbatim = 0;
 734
 735     if (/^=for\s+(\S+)\s*/s) {
 736         if ($1 eq "man" or $1 eq "roff") {
 737             print STDOUT $',"\n\n";
 738         } else {
 739             # ignore unknown for
 740         }
 741         next;
 742     }
 743     elsif (/^=begin\s+(\S+)\s*/s) {
 744         $begun = $1;
 745         if ($1 eq "man" or $1 eq "roff") {
 746             print STDOUT $'."\n\n";
 747         }
 748         next;
 749     }
 750
 751     # check for things that'll hosed our noremap scheme; affects $_
 752     init_noremap();
 753
 754     if (!/^=item/) {
 755
 756         # trofficate backslashes; must do it before what happens below
 757         s/\\/noremap('\\e')/ge;
 758
 759         # protect leading periods and quotes against *roff
 760         # mistaking them for directives
 761         s/^(?:[A-Z]<)?[.']/\\&$&/gm;
 762
 763         # first hide the escapes in case we need to
 764         # intuit something and get it wrong due to fmting
 765
 766         1 while s/([A-Z]<$nonest>)/noremap($1)/ge;
 767
 768         # func() is a reference to a perl function
 769         s{
 770             \b
 771             (
 772                 [:\w]+ \(\)
 773             )
 774         } {I<$1>}gx;
 775
 776         # func(n) is a reference to a perl function or a man page
 777         s{
 778             ([:\w]+)
 779             (
 780                 \( [^\051]+ \)
 781             )
 782         } {I<$1>\\|$2}gx;
 783
 784         # convert simple variable references
 785         s/(\s+)([\$\@%&*][\w:]+)(?!\()/${1}C<$2>/g;
 786
 787         if (m{ (
 788                     [\-\w]+
 789                     \(
 790                         [^\051]*?
 791                         [\@\$,]
 792                         [^\051]*?
 793                     \)
 794                 )
 795             }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
 796         {
 797             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n";
 798             $oops++;
 799         }
 800
 801         while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
 802             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n";
 803             $oops++;
 804         }
 805
 806         # put it back so we get the <> processed again;
 807         clear_noremap(0); # 0 means leave the E's
 808
 809     } else {
 810         # trofficate backslashes
 811         s/\\/noremap('\\e')/ge;
 812
 813     }
 814
 815     # need to hide E<> first; they're processed in clear_noremap
 816     s/(E<[^<>]+>)/noremap($1)/ge;
 817
 818
 819     $maxnest = 10;
 820     while ($maxnest-- && /[A-Z]</) {
 821
 822         # can't do C font here
 823         s/([BI])<($nonest)>/font($1) . $2 . font('R')/eg;
 824
 825         # files and filelike refs in italics
 826         s/F<($nonest)>/I<$1>/g;
 827
 828         # no break -- usually we want C<> for this
 829         s/S<($nonest)>/nobreak($1)/eg;
 830
 831         # LREF: a la HREF L<show this text|man/section>
 832         s:L<([^|>]+)\|[^>]+>:$1:g;
 833
 834         # LREF: a manpage(3f)
 835         s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g;
 836
 837         # LREF: an =item on another manpage
 838         s{
 839             L<
 840                 ([^/]+)
 841                 /
 842                 (
 843                     [:\w]+
 844                     (\(\))?
 845                 )
 846             >
 847         } {the C<$2> entry in the I<$1> manpage}gx;
 848
 849         # LREF: an =item on this manpage
 850         s{
 851            ((?:
 852             L<
 853                 /
 854                 (
 855                     [:\w]+
 856                     (\(\))?
 857                 )
 858             >
 859             (,?\s+(and\s+)?)?
 860           )+)
 861         } { internal_lrefs($1) }gex;
 862
 863         # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
 864         # the "func" can disambiguate
 865         s{
 866             L<
 867                 (?:
 868                     ([a-zA-Z]\S+?) /
 869                 )?
 870                 "?(.*?)"?
 871             >
 872         }{
 873             do {
 874                 $1      # if no $1, assume it means on this page.
 875                     ?  "the section on I<$2> in the I<$1> manpage"
 876                     :  "the section on I<$2>"
 877             }
 878         }gesx; # s in case it goes over multiple lines, so . matches \n
 879
 880         s/Z<>/\\&/g;
 881
 882         # comes last because not subject to reprocessing
 883         s/C<($nonest)>/noremap("${CFont_embed}${1}\\fR")/eg;
 884     }
 885
 886     if (s/^=//) {
 887         $needspace = 0;         # Assume this.
 888
 889         s/\n/ /g;
 890
 891         ($Cmd, $_) = split(' ', $_, 2);
 892
 893         $dotlevel = 1;
 894         if ($Cmd eq 'head1') {
 895            $dotlevel = 1;
 896         }
 897         elsif ($Cmd eq 'head2') {
 898            $dotlevel = 1;
 899         }
 900         elsif ($Cmd eq 'item') {
 901            $dotlevel = 2;
 902         }
 903
 904         if (defined $_) {
 905             &escapes($dotlevel);
 906             s/"/""/g;
 907         }
 908
 909         clear_noremap(1);
 910
 911         if ($Cmd eq 'cut') {
 912             $cutting = 1;
 913         }
 914         elsif ($Cmd eq 'head1') {
 915             s/\s+$//;
 916             delete $wanna_see{$_} if exists $wanna_see{$_};
 917             print qq{.SH "$_"\n};
 918       push(@Indices, qq{.IX Header "$_"\n});
 919         }
 920         elsif ($Cmd eq 'head2') {
 921             print qq{.Sh "$_"\n};
 922       push(@Indices, qq{.IX Subsection "$_"\n});
 923         }
 924         elsif ($Cmd eq 'over') {
 925             push(@indent,$indent);
 926             $indent += ($_ + 0) || 5;
 927         }
 928         elsif ($Cmd eq 'back') {
 929             $indent = pop(@indent);
 930             warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent;
 931             $needspace = 1;
 932         }
 933         elsif ($Cmd eq 'item') {
 934             s/^\*( |$)/\\(bu$1/g;
 935             # if you know how to get ":s please do
 936             s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g;
 937             s/\\\*\(L"([^"]+?)""/'$1'/g;
 938             s/[^"]""([^"]+?)""[^"]/'$1'/g;
 939             # here do something about the $" in perlvar?
 940             print STDOUT qq{.Ip "$_" $indent\n};
 941       push(@Indices, qq{.IX Item "$_"\n});
 942         }
 943         elsif ($Cmd eq 'pod') {
 944             # this is just a comment
 945         }
 946         else {
 947             warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n";
 948         }
 949     }
 950     else {
 951         if ($needspace) {
 952             &makespace;
 953         }
 954         &escapes(0);
 955         clear_noremap(1);
 956         print $_, "\n";
 957         $needspace = 1;
 958     }
 959 }
 960
 961 print <<"END";
 962
 963 .rn }` ''
 964 END
 965
 966 if (%wanna_see && !$lax) {
 967     @missing = keys %wanna_see;
 968     warn "$0: $Filename is missing required section"
 969         .  (@missing > 1 && "s")
 970         .  ": @missing\n";
 971     $oops++;
 972 }
 973
 974 foreach (@Indices) { print "$_\n"; }
 975
 976 exit;
 977 #exit ($oops != 0);
 978
 979 #########################################################################
 980
 981 sub nobreak {
 982     my $string = shift;
 983     $string =~ s/ /\\ /g;
 984     $string;
 985 }
 986
 987 sub escapes {
 988     my $indot = shift;
 989
 990     s/X<(.*?)>/mkindex($1)/ge;
 991
 992     # translate the minus in foo-bar into foo\-bar for roff
 993     s/([^0-9a-z-])-([^-])/$1\\-$2/g;
 994
 995     # make -- into the string version \*(-- (defined above)
 996     s/\b--\b/\\*(--/g;
 997     s/"--([^"])/"\\*(--$1/g;  # should be a better way
 998     s/([^"])--"/$1\\*(--"/g;
 999
1000     # fix up quotes; this is somewhat tricky
1001     my $dotmacroL = 'L';
1002     my $dotmacroR = 'R';
1003     if ( $indot == 1 ) {
1004         $dotmacroL = 'M';
1005         $dotmacroR = 'S';
1006     }
1007     elsif ( $indot >= 2 ) {
1008         $dotmacroL = 'N';
1009         $dotmacroR = 'T';
1010     }
1011     if (!/""/) {
1012         s/(^|\s)(['"])/noremap("$1\\*($dotmacroL$2")/ge;
1013         s/(['"])($|[\-\s,;\\!?.])/noremap("\\*($dotmacroR$1$2")/ge;
1014     }
1015
1016     #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g;
1017     #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g;
1018
1019
1020     # make sure that func() keeps a bit a space tween the parens
1021     ### s/\b\(\)/\\|()/g;
1022     ### s/\b\(\)/(\\|)/g;
1023
1024     # make C++ into \*C+, which is a squinched version (defined above)
1025     s/\bC\+\+/\\*(C+/g;
1026
1027     # make double underbars have a little tiny space between them
1028     s/__/_\\|_/g;
1029
1030     # PI goes to \*(PI (defined above)
1031     s/\bPI\b/noremap('\\*(PI')/ge;
1032
1033     # make all caps a teeny bit smaller, but don't muck with embedded code literals
1034     my $hidCFont = font('C');
1035     if ($Cmd !~ /^head1/) { # SH already makes smaller
1036         # /g isn't enough; 1 while or we'll be off
1037
1038 #       1 while s{
1039 #           (?!$hidCFont)(..|^.|^)
1040 #           \b
1041 #           (
1042 #               [A-Z][\/A-Z+:\-\d_$.]+
1043 #           )
1044 #           (s?)
1045 #           \b
1046 #       } {$1\\s-1$2\\s0}gmox;
1047
1048         1 while s{
1049             (?!$hidCFont)(..|^.|^)
1050             (
1051                 \b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b
1052             )
1053         } {
1054             $1 . noremap( '\\s-1' .  $2 . '\\s0' )
1055         }egmox;
1056
1057     }
1058 }
1059
1060 # make troff just be normal, but make small nroff get quoted
1061 # decided to just put the quotes in the text; sigh;
1062 sub ccvt {
1063     local($_,$prev) = @_;
1064     noremap(qq{.CQ "$_" \n\\&});
1065 }
1066
1067 sub makespace {
1068     if ($indent) {
1069         print ".Sp\n";
1070     }
1071     else {
1072         print ".PP\n";
1073     }
1074 }
1075
1076 sub mkindex {
1077     my ($entry) = @_;
1078     my @entries = split m:\s*/\s*:, $entry;
1079     push @Indices, ".IX Xref " . join ' ', map {qq("$_")} @entries;
1080     return '';
1081 }
1082
1083 sub font {
1084     local($font) = shift;
1085     return '\\f' . noremap($font);
1086 }
1087
1088 sub noremap {
1089     local($thing_to_hide) = shift;
1090     $thing_to_hide =~ tr/\000-\177/\200-\377/;
1091     return $thing_to_hide;
1092 }
1093
1094 sub init_noremap {
1095         # escape high bit characters in input stream
1096         s/([\200-\377])/"E<".ord($1).">"/ge;
1097 }
1098
1099 sub clear_noremap {
1100     my $ready_to_print = $_[0];
1101
1102     tr/\200-\377/\000-\177/;
1103
1104     # trofficate backslashes
1105     # s/(?!\\e)(?:..|^.|^)\\/\\e/g;
1106
1107     # now for the E<>s, which have been hidden until now
1108     # otherwise the interative \w<> processing would have
1109     # been hosed by the E<gt>
1110     s {
1111             E<
1112             (
1113                 ( \d + )
1114                 | ( [A-Za-z]+ )
1115             )
1116             >
1117     } {
1118          do {
1119              defined $2
1120                 ? chr($2)
1121                 :
1122              exists $HTML_Escapes{$3}
1123                 ? do { $HTML_Escapes{$3} }
1124                 : do {
1125                     warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n";
1126                     "E<$1>";
1127                 }
1128          }
1129     }egx if $ready_to_print;
1130 }
1131
1132 sub internal_lrefs {
1133     local($_) = shift;
1134     local $trailing_and = s/and\s+$// ? "and " : "";
1135
1136     s{L</([^>]+)>}{$1}g;
1137     my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
1138     my $retstr = "the ";
1139     my $i;
1140     for ($i = 0; $i <= $#items; $i++) {
1141         $retstr .= "C<$items[$i]>";
1142         $retstr .= ", " if @items > 2 && $i != $#items;
1143         $retstr .= " and " if $i+2 == @items;
1144     }
1145
1146     $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
1147             .  " elsewhere in this document";
1148     # terminal space to avoid words running together (pattern used
1149     # strips terminal spaces)
1150     $retstr .= " " if length $trailing_and;
1151     $retstr .=  $trailing_and;
1152
1153     return $retstr;
1154
1155 }
1156
1157 BEGIN {
1158 %HTML_Escapes = (
1159     'amp'       =>      '&',    #   ampersand
1160     'lt'        =>      '<',    #   left chevron, less-than
1161     'gt'        =>      '>',    #   right chevron, greater-than
1162     'quot'      =>      '"',    #   double quote
1163
1164     "Aacute"    =>      "A\\*'",        #   capital A, acute accent
1165     "aacute"    =>      "a\\*'",        #   small a, acute accent
1166     "Acirc"     =>      "A\\*^",        #   capital A, circumflex accent
1167     "acirc"     =>      "a\\*^",        #   small a, circumflex accent
1168     "AElig"     =>      '\*(AE',        #   capital AE diphthong (ligature)
1169     "aelig"     =>      '\*(ae',        #   small ae diphthong (ligature)
1170     "Agrave"    =>      "A\\*`",        #   capital A, grave accent
1171     "agrave"    =>      "A\\*`",        #   small a, grave accent
1172     "Aring"     =>      'A\\*o',        #   capital A, ring
1173     "aring"     =>      'a\\*o',        #   small a, ring
1174     "Atilde"    =>      'A\\*~',        #   capital A, tilde
1175     "atilde"    =>      'a\\*~',        #   small a, tilde
1176     "Auml"      =>      'A\\*:',        #   capital A, dieresis or umlaut mark
1177     "auml"      =>      'a\\*:',        #   small a, dieresis or umlaut mark
1178     "Ccedil"    =>      'C\\*,',        #   capital C, cedilla
1179     "ccedil"    =>      'c\\*,',        #   small c, cedilla
1180     "Eacute"    =>      "E\\*'",        #   capital E, acute accent
1181     "eacute"    =>      "e\\*'",        #   small e, acute accent
1182     "Ecirc"     =>      "E\\*^",        #   capital E, circumflex accent
1183     "ecirc"     =>      "e\\*^",        #   small e, circumflex accent
1184     "Egrave"    =>      "E\\*`",        #   capital E, grave accent
1185     "egrave"    =>      "e\\*`",        #   small e, grave accent
1186     "ETH"       =>      '\\*(D-',       #   capital Eth, Icelandic
1187     "eth"       =>      '\\*(d-',       #   small eth, Icelandic
1188     "Euml"      =>      "E\\*:",        #   capital E, dieresis or umlaut mark
1189     "euml"      =>      "e\\*:",        #   small e, dieresis or umlaut mark
1190     "Iacute"    =>      "I\\*'",        #   capital I, acute accent
1191     "iacute"    =>      "i\\*'",        #   small i, acute accent
1192     "Icirc"     =>      "I\\*^",        #   capital I, circumflex accent
1193     "icirc"     =>      "i\\*^",        #   small i, circumflex accent
1194     "Igrave"    =>      "I\\*`",        #   capital I, grave accent
1195     "igrave"    =>      "i\\*`",        #   small i, grave accent
1196     "Iuml"      =>      "I\\*:",        #   capital I, dieresis or umlaut mark
1197     "iuml"      =>      "i\\*:",        #   small i, dieresis or umlaut mark
1198     "Ntilde"    =>      'N\*~',         #   capital N, tilde
1199     "ntilde"    =>      'n\*~',         #   small n, tilde
1200     "Oacute"    =>      "O\\*'",        #   capital O, acute accent
1201     "oacute"    =>      "o\\*'",        #   small o, acute accent
1202     "Ocirc"     =>      "O\\*^",        #   capital O, circumflex accent
1203     "ocirc"     =>      "o\\*^",        #   small o, circumflex accent
1204     "Ograve"    =>      "O\\*`",        #   capital O, grave accent
1205     "ograve"    =>      "o\\*`",        #   small o, grave accent
1206     "Oslash"    =>      "O\\*/",        #   capital O, slash
1207     "oslash"    =>      "o\\*/",        #   small o, slash
1208     "Otilde"    =>      "O\\*~",        #   capital O, tilde
1209     "otilde"    =>      "o\\*~",        #   small o, tilde
1210     "Ouml"      =>      "O\\*:",        #   capital O, dieresis or umlaut mark
1211     "ouml"      =>      "o\\*:",        #   small o, dieresis or umlaut mark
1212     "szlig"     =>      '\*8',          #   small sharp s, German (sz ligature)
1213     "THORN"     =>      '\\*(Th',       #   capital THORN, Icelandic
1214     "thorn"     =>      '\\*(th',,      #   small thorn, Icelandic
1215     "Uacute"    =>      "U\\*'",        #   capital U, acute accent
1216     "uacute"    =>      "u\\*'",        #   small u, acute accent
1217     "Ucirc"     =>      "U\\*^",        #   capital U, circumflex accent
1218     "ucirc"     =>      "u\\*^",        #   small u, circumflex accent
1219     "Ugrave"    =>      "U\\*`",        #   capital U, grave accent
1220     "ugrave"    =>      "u\\*`",        #   small u, grave accent
1221     "Uuml"      =>      "U\\*:",        #   capital U, dieresis or umlaut mark
1222     "uuml"      =>      "u\\*:",        #   small u, dieresis or umlaut mark
1223     "Yacute"    =>      "Y\\*'",        #   capital Y, acute accent
1224     "yacute"    =>      "y\\*'",        #   small y, acute accent
1225     "yuml"      =>      "y\\*:",        #   small y, dieresis or umlaut mark
1226 );
1227 }
1228
1229 !NO!SUBS!
1230
1231 close OUT or die "Can't close $file: $!";
1232 chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
1233 exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';