pod/pod2man.PL

   1 #!/usr/local/bin/perl
   2
   3 use Config;
   4 use File::Basename qw(&basename &dirname);
   5 use Cwd;
   6
   7 # List explicitly here the variables you want Configure to
   8 # generate.  Metaconfig only looks for shell variables, so you
   9 # have to mention them as if they were shell variables, not
  10 # %Config entries.  Thus you write
  11 #  $startperl
  12 #  $man3ext
  13 # to ensure Configure will look for $Config{startperl}.
  14
  15 # This forces PL files to create target in same directory as PL file.
  16 # This is so that make depend always knows where to find PL derivatives.
  17 $origdir = cwd;
  18 chdir dirname($0);
  19 $file = basename($0, '.PL');
  20 $file .= '.com' if $^O eq 'VMS';
  21
  22 open OUT,">$file" or die "Can't create $file: $!";
  23
  24 print "Extracting $file (with variable substitutions)\n";
  25
  26 # In this section, perl variables will be expanded during extraction.
  27 # You can use $Config{...} to use Configure variables.
  28
  29 print OUT <<"!GROK!THIS!";
  30 $Config{startperl}
  31     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
  32         if \$running_under_some_shell;
  33
  34 \$DEF_PM_SECTION = '$Config{man3ext}' || '3';
  35 !GROK!THIS!
  36
  37 # In the following, perl variables are not expanded during extraction.
  38
  39 print OUT <<'!NO!SUBS!';
  40
  41 =head1 NAME
  42
  43 pod2man - translate embedded Perl pod directives into man pages
  44
  45 =head1 SYNOPSIS
  46
  47 B<pod2man>
  48 [ B<--section=>I<manext> ]
  49 [ B<--release=>I<relpatch> ]
  50 [ B<--center=>I<string> ]
  51 [ B<--date=>I<string> ]
  52 [ B<--fixed=>I<font> ]
  53 [ B<--official> ]
  54 [ B<--lax> ]
  55 I<inputfile>
  56
  57 =head1 DESCRIPTION
  58
  59 B<pod2man> converts its input file containing embedded pod directives (see
  60 L<perlpod>) into nroff source suitable for viewing with nroff(1) or
  61 troff(1) using the man(7) macro set.
  62
  63 Besides the obvious pod conversions, B<pod2man> also takes care of
  64 func(), func(n), and simple variable references like $foo or @bar so
  65 you don't have to use code escapes for them; complex expressions like
  66 C<$fred{'stuff'}> will still need to be escaped, though.  Other nagging
  67 little roffish things that it catches include translating the minus in
  68 something like foo-bar, making a long dash--like this--into a real em
  69 dash, fixing up "paired quotes", putting a little space after the
  70 parens in something like func(), making C++ and PI look right, making
  71 double underbars have a little tiny space between them, making ALLCAPS
  72 a teeny bit smaller in troff(1), and escaping backslashes so you don't
  73 have to.
  74
  75 =head1 OPTIONS
  76
  77 =over 8
  78
  79 =item center
  80
  81 Set the centered header to a specific string.  The default is
  82 "User Contributed Perl Documentation", unless the C<--official> flag is
  83 given, in which case the default is "Perl Programmers Reference Guide".
  84
  85 =item date
  86
  87 Set the left-hand footer string to this value.  By default,
  88 the modification date of the input file will be used.
  89
  90 =item fixed
  91
  92 The fixed font to use for code refs.  Defaults to CW.
  93
  94 =item official
  95
  96 Set the default header to indicate that this page is of
  97 the standard release in case C<--center> is not given.
  98
  99 =item release
 100
 101 Set the centered footer.  By default, this is the current
 102 perl release.
 103
 104 =item section
 105
 106 Set the section for the C<.TH> macro.  The standard conventions on
 107 sections are to use 1 for user commands,  2 for system calls, 3 for
 108 functions, 4 for devices, 5 for file formats, 6 for games, 7 for
 109 miscellaneous information, and 8 for administrator commands.  This works
 110 best if you put your Perl man pages in a separate tree, like
 111 F</usr/local/perl/man/>.  By default, section 1 will be used
 112 unless the file ends in F<.pm> in which case section 3 will be selected.
 113
 114 =item lax
 115
 116 Don't complain when required sections aren't present.
 117
 118 =back
 119
 120 =head1 Anatomy of a Proper Man Page
 121
 122 For those not sure of the proper layout of a man page, here's
 123 an example of the skeleton of a proper man page.  Head of the
 124 major headers should be setout as a C<=head1> directive, and
 125 are historically written in the rather startling ALL UPPER CASE
 126 format, although this is not mandatory.
 127 Minor headers may be included using C<=head2>, and are
 128 typically in mixed case.
 129
 130 =over 10
 131
 132 =item NAME
 133
 134 Mandatory section; should be a comma-separated list of programs or
 135 functions documented by this podpage, such as:
 136
 137     foo, bar - programs to do something
 138
 139 =item SYNOPSIS
 140
 141 A short usage summary for programs and functions, which
 142 may someday be deemed mandatory.
 143
 144 =item DESCRIPTION
 145
 146 Long drawn out discussion of the program.  It's a good idea to break this
 147 up into subsections using the C<=head2> directives, like
 148
 149     =head2 A Sample Subection
 150
 151     =head2 Yet Another Sample Subection
 152
 153 =item OPTIONS
 154
 155 Some people make this separate from the description.
 156
 157 =item RETURN VALUE
 158
 159 What the program or function returns if successful.
 160
 161 =item ERRORS
 162
 163 Exceptions, return codes, exit stati, and errno settings.
 164
 165 =item EXAMPLES
 166
 167 Give some example uses of the program.
 168
 169 =item ENVIRONMENT
 170
 171 Envariables this program might care about.
 172
 173 =item FILES
 174
 175 All files used by the program.  You should probably use the FE<lt>E<gt>
 176 for these.
 177
 178 =item SEE ALSO
 179
 180 Other man pages to check out, like man(1), man(7), makewhatis(8), or catman(8).
 181
 182 =item NOTES
 183
 184 Miscellaneous commentary.
 185
 186 =item CAVEATS
 187
 188 Things to take special care with; sometimes called WARNINGS.
 189
 190 =item DIAGNOSTICS
 191
 192 All possible messages the program can print out--and
 193 what they mean.
 194
 195 =item BUGS
 196
 197 Things that are broken or just don't work quite right.
 198
 199 =item RESTRICTIONS
 200
 201 Bugs you don't plan to fix :-)
 202
 203 =item AUTHOR
 204
 205 Who wrote it (or AUTHORS if multiple).
 206
 207 =item HISTORY
 208
 209 Programs derived from other sources sometimes have this, or
 210 you might keep a modification log here.
 211
 212 =back
 213
 214 =head1 EXAMPLES
 215
 216     pod2man program > program.1
 217     pod2man some_module.pm > /usr/perl/man/man3/some_module.3
 218     pod2man --section=7 note.pod > note.7
 219
 220 =head1 DIAGNOSTICS
 221
 222 The following diagnostics are generated by B<pod2man>.  Items
 223 marked "(W)" are non-fatal, whereas the "(F)" errors will cause
 224 B<pod2man> to immediately exit with a non-zero status.
 225
 226 =over 4
 227
 228 =item bad option in paragraph %d of %s: ``%s'' should be [%s]<%s>
 229
 230 (W) If you start include an option, you should set it off
 231 as bold, italic, or code.
 232
 233 =item can't open %s: %s
 234
 235 (F) The input file wasn't available for the given reason.
 236
 237 =item Improper man page - no dash in NAME header in paragraph %d of %s
 238
 239 (W) The NAME header did not have an isolated dash in it.  This is
 240 considered important.
 241
 242 =item Invalid man page - no NAME line in %s
 243
 244 (F) You did not include a NAME header, which is essential.
 245
 246 =item roff font should be 1 or 2 chars, not `%s'  (F)
 247
 248 (F) The font specified with the C<--fixed> option was not
 249 a one- or two-digit roff font.
 250
 251 =item %s is missing required section: %s
 252
 253 (W) Required sections include NAME, DESCRIPTION, and if you're
 254 using a section starting with a 3, also a SYNOPSIS.  Actually,
 255 not having a NAME is a fatal.
 256
 257 =item Unknown escape: %s in %s
 258
 259 (W) An unknown HTML entity (probably for an 8-bit character) was given via
 260 a C<EE<lt>E<gt>> directive.  Besides amp, lt, gt, and quot, recognized
 261 entities are Aacute, aacute, Acirc, acirc, AElig, aelig, Agrave, agrave,
 262 Aring, aring, Atilde, atilde, Auml, auml, Ccedil, ccedil, Eacute, eacute,
 263 Ecirc, ecirc, Egrave, egrave, ETH, eth, Euml, euml, Iacute, iacute, Icirc,
 264 icirc, Igrave, igrave, Iuml, iuml, Ntilde, ntilde, Oacute, oacute, Ocirc,
 265 ocirc, Ograve, ograve, Oslash, oslash, Otilde, otilde, Ouml, ouml, szlig,
 266 THORN, thorn, Uacute, uacute, Ucirc, ucirc, Ugrave, ugrave, Uuml, uuml,
 267 Yacute, yacute, and yuml.
 268
 269 =item Unmatched =back
 270
 271 (W) You have a C<=back> without a corresponding C<=over>.
 272
 273 =item Unrecognized pod directive: %s
 274
 275 (W) You specified a pod directive that isn't in the known list of
 276 C<=head1>, C<=head2>, C<=item>, C<=over>, C<=back>, or C<=cut>.
 277
 278
 279 =back
 280
 281 =head1 NOTES
 282
 283 If you would like to print out a lot of man page continuously, you
 284 probably want to set the C and D registers to set contiguous page
 285 numbering and even/odd paging, at least on some versions of man(7).
 286 Settting the F register will get you some additional experimental
 287 indexing:
 288
 289     troff -man -rC1 -rD1 -rF1 perl.1 perldata.1 perlsyn.1 ...
 290
 291 The indexing merely outputs messages via C<.tm> for each
 292 major page, section, subsection, item, and any C<XE<lt>E<gt>>
 293 directives.
 294
 295
 296 =head1 RESTRICTIONS
 297
 298 None at this time.
 299
 300 =head1 BUGS
 301
 302 The =over and =back directives don't really work right.  They
 303 take absolute positions instead of offsets, don't nest well, and
 304 making people count is suboptimal in any event.
 305
 306 =head1 AUTHORS
 307
 308 Original prototype by Larry Wall, but so massively hacked over by
 309 Tom Christiansen such that Larry probably doesn't recognize it anymore.
 310
 311 =cut
 312
 313 $/ = "";
 314 $cutting = 1;
 315 @Indices = ();
 316
 317 # We try first to get the version number from a local binary, in case we're
 318 # running an installed version of Perl to produce documentation from an
 319 # uninstalled newer version's pod files.
 320 if ($^O ne 'plan9' and $^O ne 'dos' and $^O ne 'os2' and $^O ne 'MSWin32') {
 321   my $perl = (-x './perl') ? './perl' : ((-x '../perl') ? '../perl' : '');
 322   ($version,$patch) = `$perl -e 'print $]'` =~ /^(\d\.\d{3})(\d{2})?/ if $perl;
 323 }
 324 # No luck; we'll just go with the running Perl's version
 325 ($version,$patch) = $] =~ /^(.{5})(\d{2})?/ unless $version;
 326 $DEF_RELEASE  = "perl $version";
 327 $DEF_RELEASE .= ", patch $patch" if $patch;
 328
 329
 330 sub makedate {
 331     my $secs = shift;
 332     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($secs);
 333     my $mname = (qw{Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec})[$mon];
 334     return "$mday/$mname/$year";
 335 }
 336
 337 use Getopt::Long;
 338
 339 $DEF_SECTION = 1;
 340 $DEF_CENTER = "User Contributed Perl Documentation";
 341 $STD_CENTER = "Perl Programmers Reference Guide";
 342 $DEF_FIXED = 'CW';
 343 $DEF_LAX = 0;
 344
 345 sub usage {
 346     warn "$0: @_\n" if @_;
 347     die <<EOF;
 348 usage: $0 [options] podpage
 349 Options are:
 350         --section=manext      (default "$DEF_SECTION")
 351         --release=relpatch    (default "$DEF_RELEASE")
 352         --center=string       (default "$DEF_CENTER")
 353         --date=string         (default "$DEF_DATE")
 354         --fixed=font          (default "$DEF_FIXED")
 355         --official            (default NOT)
 356         --lax                 (default NOT)
 357 EOF
 358 }
 359
 360 $uok = GetOptions( qw(
 361         section=s
 362         release=s
 363         center=s
 364         date=s
 365         fixed=s
 366         official
 367         lax
 368         help));
 369
 370 $DEF_DATE = makedate((stat($ARGV[0]))[9] || time());
 371
 372 usage("Usage error!") unless $uok;
 373 usage() if $opt_help;
 374 usage("Need one and only one podpage argument") unless @ARGV == 1;
 375
 376 $section = $opt_section || ($ARGV[0] =~ /\.pm$/
 377                                 ? $DEF_PM_SECTION : $DEF_SECTION);
 378 $RP = $opt_release || $DEF_RELEASE;
 379 $center = $opt_center || ($opt_official ? $STD_CENTER : $DEF_CENTER);
 380 $lax = $opt_lax || $DEF_LAX;
 381
 382 $CFont = $opt_fixed || $DEF_FIXED;
 383
 384 if (length($CFont) == 2) {
 385     $CFont_embed = "\\f($CFont";
 386 }
 387 elsif (length($CFont) == 1) {
 388     $CFont_embed = "\\f$CFont";
 389 }
 390 else {
 391     die "roff font should be 1 or 2 chars, not `$CFont_embed'";
 392 }
 393
 394 $date = $opt_date || $DEF_DATE;
 395
 396 for (qw{NAME DESCRIPTION}) {
 397 # for (qw{NAME DESCRIPTION AUTHOR}) {
 398     $wanna_see{$_}++;
 399 }
 400 $wanna_see{SYNOPSIS}++ if $section =~ /^3/;
 401
 402
 403 $name = @ARGV ? $ARGV[0] : "<STDIN>";
 404 $Filename = $name;
 405 if ($section =~ /^1/) {
 406     require File::Basename;
 407     $name = uc File::Basename::basename($name);
 408 }
 409 $name =~ s/\.(pod|p[lm])$//i;
 410
 411 # Lose everything up to the first of
 412 #     */lib/*perl*      standard or site_perl module
 413 #     */*perl*/lib      from -D prefix=/opt/perl
 414 #     */*perl*/         random module hierarchy
 415 # which works.
 416 $name =~ s-//+-/-g;
 417 if ($name =~ s-^.*?/lib/[^/]*perl[^/]*/--i
 418         or $name =~ s-^.*?/[^/]*perl[^/]*/lib/--i
 419         or $name =~ s-^.*?/[^/]*perl[^/]*/--i) {
 420     # Lose ^site(_perl)?/.
 421     $name =~ s-^site(_perl)?/--;
 422     # Lose ^arch/.      (XXX should we use Config? Just for archname?)
 423     $name =~ s~^(.*-$^O|$^O-.*)/~~o;
 424     # Lose ^version/.
 425     $name =~ s-^\d+\.\d+/--;
 426 }
 427
 428 # Translate Getopt/Long to Getopt::Long, etc.
 429 $name =~ s(/)(::)g;
 430
 431 if ($name ne 'something') {
 432     FCHECK: {
 433         open(F, "< $ARGV[0]") || die "can't open $ARGV[0]: $!";
 434         while (<F>) {
 435             next unless /^=\b/;
 436             if (/^=head1\s+NAME\s*$/) {  # an /m would forgive mistakes
 437                 $_ = <F>;
 438                 unless (/\s*-+\s+/) {
 439                     $oops++;
 440                     warn "$0: Improper man page - no dash in NAME header in paragraph $. of $ARGV[0]\n"
 441                 } else {
 442                     my @n = split /\s+-+\s+/;
 443                     if (@n != 2) {
 444                         $oops++;
 445                         warn "$0: Improper man page - malformed NAME header in paragraph $. of $ARGV[0]\n"
 446                     }
 447                     else {
 448                         %namedesc = @n;
 449                     }
 450                 }
 451                 last FCHECK;
 452             }
 453             next if /^=cut\b/;  # DB_File and Net::Ping have =cut before NAME
 454             next if /^=pod\b/;  # It is OK to have =pod before NAME
 455             die "$0: Invalid man page - 1st pod line is not NAME in $ARGV[0]\n" unless $lax;
 456         }
 457         die "$0: Invalid man page - no documentation in $ARGV[0]\n" unless $lax;
 458     }
 459     close F;
 460 }
 461
 462 print <<"END";
 463 .rn '' }`
 464 ''' \$RCSfile\$\$Revision\$\$Date\$
 465 '''
 466 ''' \$Log\$
 467 '''
 468 .de Sh
 469 .br
 470 .if t .Sp
 471 .ne 5
 472 .PP
 473 \\fB\\\\\$1\\fR
 474 .PP
 475 ..
 476 .de Sp
 477 .if t .sp .5v
 478 .if n .sp
 479 ..
 480 .de Ip
 481 .br
 482 .ie \\\\n(.\$>=3 .ne \\\\\$3
 483 .el .ne 3
 484 .IP "\\\\\$1" \\\\\$2
 485 ..
 486 .de Vb
 487 .ft $CFont
 488 .nf
 489 .ne \\\\\$1
 490 ..
 491 .de Ve
 492 .ft R
 493
 494 .fi
 495 ..
 496 '''
 497 '''
 498 '''     Set up \\*(-- to give an unbreakable dash;
 499 '''     string Tr holds user defined translation string.
 500 '''     Bell System Logo is used as a dummy character.
 501 '''
 502 .tr \\(*W-|\\(bv\\*(Tr
 503 .ie n \\{\\
 504 .ds -- \\(*W-
 505 .ds PI pi
 506 .if (\\n(.H=4u)&(1m=24u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-12u'-\\" diablo 10 pitch
 507 .if (\\n(.H=4u)&(1m=20u) .ds -- \\(*W\\h'-12u'\\(*W\\h'-8u'-\\" diablo 12 pitch
 508 .ds L" ""
 509 .ds R" ""
 510 '''   \\*(M", \\*(S", \\*(N" and \\*(T" are the equivalent of
 511 '''   \\*(L" and \\*(R", except that they are used on ".xx" lines,
 512 '''   such as .IP and .SH, which do another additional levels of
 513 '''   double-quote interpretation
 514 .ds M" """
 515 .ds S" """
 516 .ds N" """""
 517 .ds T" """""
 518 .ds L' '
 519 .ds R' '
 520 .ds M' '
 521 .ds S' '
 522 .ds N' '
 523 .ds T' '
 524 'br\\}
 525 .el\\{\\
 526 .ds -- \\(em\\|
 527 .tr \\*(Tr
 528 .ds L" ``
 529 .ds R" ''
 530 .ds M" ``
 531 .ds S" ''
 532 .ds N" ``
 533 .ds T" ''
 534 .ds L' `
 535 .ds R' '
 536 .ds M' `
 537 .ds S' '
 538 .ds N' `
 539 .ds T' '
 540 .ds PI \\(*p
 541 'br\\}
 542 END
 543
 544 print <<'END';
 545 .\"     If the F register is turned on, we'll generate
 546 .\"     index entries out stderr for the following things:
 547 .\"             TH      Title
 548 .\"             SH      Header
 549 .\"             Sh      Subsection
 550 .\"             Ip      Item
 551 .\"             X<>     Xref  (embedded
 552 .\"     Of course, you have to process the output yourself
 553 .\"     in some meaninful fashion.
 554 .if \nF \{
 555 .de IX
 556 .tm Index:\\$1\t\\n%\t"\\$2"
 557 ..
 558 .nr % 0
 559 .rr F
 560 .\}
 561 END
 562
 563 print <<"END";
 564 .TH $name $section "$RP" "$date" "$center"
 565 .UC
 566 END
 567
 568 push(@Indices, qq{.IX Title "$name $section"});
 569
 570 while (($name, $desc) = each %namedesc) {
 571     for ($name, $desc) { s/^\s+//; s/\s+$//; }
 572     push(@Indices, qq(.IX Name "$name - $desc"\n));
 573 }
 574
 575 print <<'END';
 576 .if n .hy 0
 577 .if n .na
 578 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
 579 .de CQ          \" put $1 in typewriter font
 580 END
 581 print ".ft $CFont\n";
 582 print <<'END';
 583 'if n "\c
 584 'if t \\&\\$1\c
 585 'if n \\&\\$1\c
 586 'if n \&"
 587 \\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
 588 '.ft R
 589 ..
 590 .\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
 591 .       \" AM - accent mark definitions
 592 .bd B 3
 593 .       \" fudge factors for nroff and troff
 594 .if n \{\
 595 .       ds #H 0
 596 .       ds #V .8m
 597 .       ds #F .3m
 598 .       ds #[ \f1
 599 .       ds #] \fP
 600 .\}
 601 .if t \{\
 602 .       ds #H ((1u-(\\\\n(.fu%2u))*.13m)
 603 .       ds #V .6m
 604 .       ds #F 0
 605 .       ds #[ \&
 606 .       ds #] \&
 607 .\}
 608 .       \" simple accents for nroff and troff
 609 .if n \{\
 610 .       ds ' \&
 611 .       ds ` \&
 612 .       ds ^ \&
 613 .       ds , \&
 614 .       ds ~ ~
 615 .       ds ? ?
 616 .       ds ! !
 617 .       ds /
 618 .       ds q
 619 .\}
 620 .if t \{\
 621 .       ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
 622 .       ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
 623 .       ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
 624 .       ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
 625 .       ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
 626 .       ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
 627 .       ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
 628 .       ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
 629 .       ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
 630 .\}
 631 .       \" troff and (daisy-wheel) nroff accents
 632 .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
 633 .ds 8 \h'\*(#H'\(*b\h'-\*(#H'
 634 .ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
 635 .ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
 636 .ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
 637 .ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
 638 .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
 639 .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
 640 .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
 641 .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
 642 .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
 643 .ds ae a\h'-(\w'a'u*4/10)'e
 644 .ds Ae A\h'-(\w'A'u*4/10)'E
 645 .ds oe o\h'-(\w'o'u*4/10)'e
 646 .ds Oe O\h'-(\w'O'u*4/10)'E
 647 .       \" corrections for vroff
 648 .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
 649 .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
 650 .       \" for low resolution devices (crt and lpr)
 651 .if \n(.H>23 .if \n(.V>19 \
 652 \{\
 653 .       ds : e
 654 .       ds 8 ss
 655 .       ds v \h'-1'\o'\(aa\(ga'
 656 .       ds _ \h'-1'^
 657 .       ds . \h'-1'.
 658 .       ds 3 3
 659 .       ds o a
 660 .       ds d- d\h'-1'\(ga
 661 .       ds D- D\h'-1'\(hy
 662 .       ds th \o'bp'
 663 .       ds Th \o'LP'
 664 .       ds ae ae
 665 .       ds Ae AE
 666 .       ds oe oe
 667 .       ds Oe OE
 668 .\}
 669 .rm #[ #] #H #V #F C
 670 END
 671
 672 $indent = 0;
 673
 674 $begun = "";
 675
 676 # Unrolling [^A-Z>]|[A-Z](?!<) gives:    // MRE pp 165.
 677 my $nonest = '(?:[^A-Z>]*(?:[A-Z](?!<)[^A-Z>]*)*)';
 678
 679 while (<>) {
 680     if ($cutting) {
 681         next unless /^=/;
 682         $cutting = 0;
 683     }
 684     if ($begun) {
 685         if (/^=end\s+$begun/) {
 686             $begun = "";
 687         }
 688         elsif ($begun =~ /^(roff|man)$/) {
 689             print STDOUT $_;
 690         }
 691         next;
 692     }
 693     chomp;
 694
 695     # Translate verbatim paragraph
 696
 697     if (/^\s/) {
 698         @lines = split(/\n/);
 699         for (@lines) {
 700             1 while s
 701                 {^( [^\t]* ) \t ( \t* ) }
 702                 { $1 . ' ' x (8 - (length($1)%8) + 8 * (length($2))) }ex;
 703             s/\\/\\e/g;
 704             s/\A/\\&/s;
 705         }
 706         $lines = @lines;
 707         makespace() unless $verbatim++;
 708         print ".Vb $lines\n";
 709         print join("\n", @lines), "\n";
 710         print ".Ve\n";
 711         $needspace = 0;
 712         next;
 713     }
 714
 715     $verbatim = 0;
 716
 717     if (/^=for\s+(\S+)\s*/s) {
 718         if ($1 eq "man" or $1 eq "roff") {
 719             print STDOUT $',"\n\n";
 720         } else {
 721             # ignore unknown for
 722         }
 723         next;
 724     }
 725     elsif (/^=begin\s+(\S+)\s*/s) {
 726         $begun = $1;
 727         if ($1 eq "man" or $1 eq "roff") {
 728             print STDOUT $'."\n\n";
 729         }
 730         next;
 731     }
 732
 733     # check for things that'll hosed our noremap scheme; affects $_
 734     init_noremap();
 735
 736     if (!/^=item/) {
 737
 738         # trofficate backslashes; must do it before what happens below
 739         s/\\/noremap('\\e')/ge;
 740
 741         # protect leading periods and quotes against *roff
 742         # mistaking them for directives
 743         s/^(?:[A-Z]<)?[.']/\\&$&/gm;
 744
 745         # first hide the escapes in case we need to
 746         # intuit something and get it wrong due to fmting
 747
 748         1 while s/([A-Z]<$nonest>)/noremap($1)/ge;
 749
 750         # func() is a reference to a perl function
 751         s{
 752             \b
 753             (
 754                 [:\w]+ \(\)
 755             )
 756         } {I<$1>}gx;
 757
 758         # func(n) is a reference to a perl function or a man page
 759         s{
 760             ([:\w]+)
 761             (
 762                 \( [^\051]+ \)
 763             )
 764         } {I<$1>\\|$2}gx;
 765
 766         # convert simple variable references
 767         s/(\s+)([\$\@%][\w:]+)(?!\()/${1}C<$2>/g;
 768
 769         if (m{ (
 770                     [\-\w]+
 771                     \(
 772                         [^\051]*?
 773                         [\@\$,]
 774                         [^\051]*?
 775                     \)
 776                 )
 777             }x && $` !~ /([LCI]<[^<>]*|-)$/ && !/^=\w/)
 778         {
 779             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [LCI]<$1>\n";
 780             $oops++;
 781         }
 782
 783         while (/(-[a-zA-Z])\b/g && $` !~ /[\w\-]$/) {
 784             warn "$0: bad option in paragraph $. of $ARGV: ``$1'' should be [CB]<$1>\n";
 785             $oops++;
 786         }
 787
 788         # put it back so we get the <> processed again;
 789         clear_noremap(0); # 0 means leave the E's
 790
 791     } else {
 792         # trofficate backslashes
 793         s/\\/noremap('\\e')/ge;
 794
 795     }
 796
 797     # need to hide E<> first; they're processed in clear_noremap
 798     s/(E<[^<>]+>)/noremap($1)/ge;
 799
 800
 801     $maxnest = 10;
 802     while ($maxnest-- && /[A-Z]</) {
 803
 804         # can't do C font here
 805         s/([BI])<($nonest)>/font($1) . $2 . font('R')/eg;
 806
 807         # files and filelike refs in italics
 808         s/F<($nonest)>/I<$1>/g;
 809
 810         # no break -- usually we want C<> for this
 811         s/S<($nonest)>/nobreak($1)/eg;
 812
 813         # LREF: a la HREF L<show this text|man/section>
 814         s:L<([^|>]+)\|[^>]+>:$1:g;
 815
 816         # LREF: a manpage(3f)
 817         s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the I<$1>$2 manpage:g;
 818
 819         # LREF: an =item on another manpage
 820         s{
 821             L<
 822                 ([^/]+)
 823                 /
 824                 (
 825                     [:\w]+
 826                     (\(\))?
 827                 )
 828             >
 829         } {the C<$2> entry in the I<$1> manpage}gx;
 830
 831         # LREF: an =item on this manpage
 832         s{
 833            ((?:
 834             L<
 835                 /
 836                 (
 837                     [:\w]+
 838                     (\(\))?
 839                 )
 840             >
 841             (,?\s+(and\s+)?)?
 842           )+)
 843         } { internal_lrefs($1) }gex;
 844
 845         # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
 846         # the "func" can disambiguate
 847         s{
 848             L<
 849                 (?:
 850                     ([a-zA-Z]\S+?) /
 851                 )?
 852                 "?(.*?)"?
 853             >
 854         }{
 855             do {
 856                 $1      # if no $1, assume it means on this page.
 857                     ?  "the section on I<$2> in the I<$1> manpage"
 858                     :  "the section on I<$2>"
 859             }
 860         }gesx; # s in case it goes over multiple lines, so . matches \n
 861
 862         s/Z<>/\\&/g;
 863
 864         # comes last because not subject to reprocessing
 865         s/C<($nonest)>/noremap("${CFont_embed}${1}\\fR")/eg;
 866     }
 867
 868     if (s/^=//) {
 869         $needspace = 0;         # Assume this.
 870
 871         s/\n/ /g;
 872
 873         ($Cmd, $_) = split(' ', $_, 2);
 874
 875         $dotlevel = 1;
 876         if ($Cmd eq 'head1') {
 877            $dotlevel = 1;
 878         }
 879         elsif ($Cmd eq 'head2') {
 880            $dotlevel = 1;
 881         }
 882         elsif ($Cmd eq 'item') {
 883            $dotlevel = 2;
 884         }
 885
 886         if (defined $_) {
 887             &escapes($dotlevel);
 888             s/"/""/g;
 889         }
 890
 891         clear_noremap(1);
 892
 893         if ($Cmd eq 'cut') {
 894             $cutting = 1;
 895         }
 896         elsif ($Cmd eq 'head1') {
 897             s/\s+$//;
 898             delete $wanna_see{$_} if exists $wanna_see{$_};
 899             print qq{.SH "$_"\n};
 900       push(@Indices, qq{.IX Header "$_"\n});
 901         }
 902         elsif ($Cmd eq 'head2') {
 903             print qq{.Sh "$_"\n};
 904       push(@Indices, qq{.IX Subsection "$_"\n});
 905         }
 906         elsif ($Cmd eq 'over') {
 907             push(@indent,$indent);
 908             $indent += ($_ + 0) || 5;
 909         }
 910         elsif ($Cmd eq 'back') {
 911             $indent = pop(@indent);
 912             warn "$0: Unmatched =back in paragraph $. of $ARGV\n" unless defined $indent;
 913             $needspace = 1;
 914         }
 915         elsif ($Cmd eq 'item') {
 916             s/^\*( |$)/\\(bu$1/g;
 917             # if you know how to get ":s please do
 918             s/\\\*\(L"([^"]+?)\\\*\(R"/'$1'/g;
 919             s/\\\*\(L"([^"]+?)""/'$1'/g;
 920             s/[^"]""([^"]+?)""[^"]/'$1'/g;
 921             # here do something about the $" in perlvar?
 922             print STDOUT qq{.Ip "$_" $indent\n};
 923       push(@Indices, qq{.IX Item "$_"\n});
 924         }
 925         elsif ($Cmd eq 'pod') {
 926             # this is just a comment
 927         }
 928         else {
 929             warn "$0: Unrecognized pod directive in paragraph $. of $ARGV: $Cmd\n";
 930         }
 931     }
 932     else {
 933         if ($needspace) {
 934             &makespace;
 935         }
 936         &escapes(0);
 937         clear_noremap(1);
 938         print $_, "\n";
 939         $needspace = 1;
 940     }
 941 }
 942
 943 print <<"END";
 944
 945 .rn }` ''
 946 END
 947
 948 if (%wanna_see && !$lax) {
 949     @missing = keys %wanna_see;
 950     warn "$0: $Filename is missing required section"
 951         .  (@missing > 1 && "s")
 952         .  ": @missing\n";
 953     $oops++;
 954 }
 955
 956 foreach (@Indices) { print "$_\n"; }
 957
 958 exit;
 959 #exit ($oops != 0);
 960
 961 #########################################################################
 962
 963 sub nobreak {
 964     my $string = shift;
 965     $string =~ s/ /\\ /g;
 966     $string;
 967 }
 968
 969 sub escapes {
 970     my $indot = shift;
 971
 972     s/X<(.*?)>/mkindex($1)/ge;
 973
 974     # translate the minus in foo-bar into foo\-bar for roff
 975     s/([^0-9a-z-])-([^-])/$1\\-$2/g;
 976
 977     # make -- into the string version \*(-- (defined above)
 978     s/\b--\b/\\*(--/g;
 979     s/"--([^"])/"\\*(--$1/g;  # should be a better way
 980     s/([^"])--"/$1\\*(--"/g;
 981
 982     # fix up quotes; this is somewhat tricky
 983     my $dotmacroL = 'L';
 984     my $dotmacroR = 'R';
 985     if ( $indot == 1 ) {
 986         $dotmacroL = 'M';
 987         $dotmacroR = 'S';
 988     }
 989     elsif ( $indot >= 2 ) {
 990         $dotmacroL = 'N';
 991         $dotmacroR = 'T';
 992     }
 993     if (!/""/) {
 994         s/(^|\s)(['"])/noremap("$1\\*($dotmacroL$2")/ge;
 995         s/(['"])($|[\-\s,;\\!?.])/noremap("\\*($dotmacroR$1$2")/ge;
 996     }
 997
 998     #s/(?!")(?:.)--(?!")(?:.)/\\*(--/g;
 999     #s/(?:(?!")(?:.)--(?:"))|(?:(?:")--(?!")(?:.))/\\*(--/g;
1000
1001
1002     # make sure that func() keeps a bit a space tween the parens
1003     ### s/\b\(\)/\\|()/g;
1004     ### s/\b\(\)/(\\|)/g;
1005
1006     # make C++ into \*C+, which is a squinched version (defined above)
1007     s/\bC\+\+/\\*(C+/g;
1008
1009     # make double underbars have a little tiny space between them
1010     s/__/_\\|_/g;
1011
1012     # PI goes to \*(PI (defined above)
1013     s/\bPI\b/noremap('\\*(PI')/ge;
1014
1015     # make all caps a teeny bit smaller, but don't muck with embedded code literals
1016     my $hidCFont = font('C');
1017     if ($Cmd !~ /^head1/) { # SH already makes smaller
1018         # /g isn't enough; 1 while or we'll be off
1019
1020 #       1 while s{
1021 #           (?!$hidCFont)(..|^.|^)
1022 #           \b
1023 #           (
1024 #               [A-Z][\/A-Z+:\-\d_$.]+
1025 #           )
1026 #           (s?)
1027 #           \b
1028 #       } {$1\\s-1$2\\s0}gmox;
1029
1030         1 while s{
1031             (?!$hidCFont)(..|^.|^)
1032             (
1033                 \b[A-Z]{2,}[\/A-Z+:\-\d_\$]*\b
1034             )
1035         } {
1036             $1 . noremap( '\\s-1' .  $2 . '\\s0' )
1037         }egmox;
1038
1039     }
1040 }
1041
1042 # make troff just be normal, but make small nroff get quoted
1043 # decided to just put the quotes in the text; sigh;
1044 sub ccvt {
1045     local($_,$prev) = @_;
1046     noremap(qq{.CQ "$_" \n\\&});
1047 }
1048
1049 sub makespace {
1050     if ($indent) {
1051         print ".Sp\n";
1052     }
1053     else {
1054         print ".PP\n";
1055     }
1056 }
1057
1058 sub mkindex {
1059     my ($entry) = @_;
1060     my @entries = split m:\s*/\s*:, $entry;
1061     push @Indices, ".IX Xref " . join ' ', map {qq("$_")} @entries;
1062     return '';
1063 }
1064
1065 sub font {
1066     local($font) = shift;
1067     return '\\f' . noremap($font);
1068 }
1069
1070 sub noremap {
1071     local($thing_to_hide) = shift;
1072     $thing_to_hide =~ tr/\000-\177/\200-\377/;
1073     return $thing_to_hide;
1074 }
1075
1076 sub init_noremap {
1077         # escape high bit characters in input stream
1078         s/([\200-\377])/"E<".ord($1).">"/ge;
1079 }
1080
1081 sub clear_noremap {
1082     my $ready_to_print = $_[0];
1083
1084     tr/\200-\377/\000-\177/;
1085
1086     # trofficate backslashes
1087     # s/(?!\\e)(?:..|^.|^)\\/\\e/g;
1088
1089     # now for the E<>s, which have been hidden until now
1090     # otherwise the interative \w<> processing would have
1091     # been hosed by the E<gt>
1092     s {
1093             E<
1094             (
1095                 ( \d + )
1096                 | ( [A-Za-z]+ )
1097             )
1098             >
1099     } {
1100          do {
1101              defined $2
1102                 ? chr($2)
1103                 :
1104              exists $HTML_Escapes{$3}
1105                 ? do { $HTML_Escapes{$3} }
1106                 : do {
1107                     warn "$0: Unknown escape in paragraph $. of $ARGV: ``$&''\n";
1108                     "E<$1>";
1109                 }
1110          }
1111     }egx if $ready_to_print;
1112 }
1113
1114 sub internal_lrefs {
1115     local($_) = shift;
1116     local $trailing_and = s/and\s+$// ? "and " : "";
1117
1118     s{L</([^>]+)>}{$1}g;
1119     my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
1120     my $retstr = "the ";
1121     my $i;
1122     for ($i = 0; $i <= $#items; $i++) {
1123         $retstr .= "C<$items[$i]>";
1124         $retstr .= ", " if @items > 2 && $i != $#items;
1125         $retstr .= " and " if $i+2 == @items;
1126     }
1127
1128     $retstr .= " entr" . ( @items > 1  ? "ies" : "y" )
1129             .  " elsewhere in this document";
1130     # terminal space to avoid words running together (pattern used
1131     # strips terminal spaces)
1132     $retstr .= " " if length $trailing_and;
1133     $retstr .=  $trailing_and;
1134
1135     return $retstr;
1136
1137 }
1138
1139 BEGIN {
1140 %HTML_Escapes = (
1141     'amp'       =>      '&',    #   ampersand
1142     'lt'        =>      '<',    #   left chevron, less-than
1143     'gt'        =>      '>',    #   right chevron, greater-than
1144     'quot'      =>      '"',    #   double quote
1145
1146     "Aacute"    =>      "A\\*'",        #   capital A, acute accent
1147     "aacute"    =>      "a\\*'",        #   small a, acute accent
1148     "Acirc"     =>      "A\\*^",        #   capital A, circumflex accent
1149     "acirc"     =>      "a\\*^",        #   small a, circumflex accent
1150     "AElig"     =>      '\*(AE',        #   capital AE diphthong (ligature)
1151     "aelig"     =>      '\*(ae',        #   small ae diphthong (ligature)
1152     "Agrave"    =>      "A\\*`",        #   capital A, grave accent
1153     "agrave"    =>      "A\\*`",        #   small a, grave accent
1154     "Aring"     =>      'A\\*o',        #   capital A, ring
1155     "aring"     =>      'a\\*o',        #   small a, ring
1156     "Atilde"    =>      'A\\*~',        #   capital A, tilde
1157     "atilde"    =>      'a\\*~',        #   small a, tilde
1158     "Auml"      =>      'A\\*:',        #   capital A, dieresis or umlaut mark
1159     "auml"      =>      'a\\*:',        #   small a, dieresis or umlaut mark
1160     "Ccedil"    =>      'C\\*,',        #   capital C, cedilla
1161     "ccedil"    =>      'c\\*,',        #   small c, cedilla
1162     "Eacute"    =>      "E\\*'",        #   capital E, acute accent
1163     "eacute"    =>      "e\\*'",        #   small e, acute accent
1164     "Ecirc"     =>      "E\\*^",        #   capital E, circumflex accent
1165     "ecirc"     =>      "e\\*^",        #   small e, circumflex accent
1166     "Egrave"    =>      "E\\*`",        #   capital E, grave accent
1167     "egrave"    =>      "e\\*`",        #   small e, grave accent
1168     "ETH"       =>      '\\*(D-',       #   capital Eth, Icelandic
1169     "eth"       =>      '\\*(d-',       #   small eth, Icelandic
1170     "Euml"      =>      "E\\*:",        #   capital E, dieresis or umlaut mark
1171     "euml"      =>      "e\\*:",        #   small e, dieresis or umlaut mark
1172     "Iacute"    =>      "I\\*'",        #   capital I, acute accent
1173     "iacute"    =>      "i\\*'",        #   small i, acute accent
1174     "Icirc"     =>      "I\\*^",        #   capital I, circumflex accent
1175     "icirc"     =>      "i\\*^",        #   small i, circumflex accent
1176     "Igrave"    =>      "I\\*`",        #   capital I, grave accent
1177     "igrave"    =>      "i\\*`",        #   small i, grave accent
1178     "Iuml"      =>      "I\\*:",        #   capital I, dieresis or umlaut mark
1179     "iuml"      =>      "i\\*:",        #   small i, dieresis or umlaut mark
1180     "Ntilde"    =>      'N\*~',         #   capital N, tilde
1181     "ntilde"    =>      'n\*~',         #   small n, tilde
1182     "Oacute"    =>      "O\\*'",        #   capital O, acute accent
1183     "oacute"    =>      "o\\*'",        #   small o, acute accent
1184     "Ocirc"     =>      "O\\*^",        #   capital O, circumflex accent
1185     "ocirc"     =>      "o\\*^",        #   small o, circumflex accent
1186     "Ograve"    =>      "O\\*`",        #   capital O, grave accent
1187     "ograve"    =>      "o\\*`",        #   small o, grave accent
1188     "Oslash"    =>      "O\\*/",        #   capital O, slash
1189     "oslash"    =>      "o\\*/",        #   small o, slash
1190     "Otilde"    =>      "O\\*~",        #   capital O, tilde
1191     "otilde"    =>      "o\\*~",        #   small o, tilde
1192     "Ouml"      =>      "O\\*:",        #   capital O, dieresis or umlaut mark
1193     "ouml"      =>      "o\\*:",        #   small o, dieresis or umlaut mark
1194     "szlig"     =>      '\*8',          #   small sharp s, German (sz ligature)
1195     "THORN"     =>      '\\*(Th',       #   capital THORN, Icelandic
1196     "thorn"     =>      '\\*(th',,      #   small thorn, Icelandic
1197     "Uacute"    =>      "U\\*'",        #   capital U, acute accent
1198     "uacute"    =>      "u\\*'",        #   small u, acute accent
1199     "Ucirc"     =>      "U\\*^",        #   capital U, circumflex accent
1200     "ucirc"     =>      "u\\*^",        #   small u, circumflex accent
1201     "Ugrave"    =>      "U\\*`",        #   capital U, grave accent
1202     "ugrave"    =>      "u\\*`",        #   small u, grave accent
1203     "Uuml"      =>      "U\\*:",        #   capital U, dieresis or umlaut mark
1204     "uuml"      =>      "u\\*:",        #   small u, dieresis or umlaut mark
1205     "Yacute"    =>      "Y\\*'",        #   capital Y, acute accent
1206     "yacute"    =>      "y\\*'",        #   small y, acute accent
1207     "yuml"      =>      "y\\*:",        #   small y, dieresis or umlaut mark
1208 );
1209 }
1210
1211 !NO!SUBS!
1212
1213 close OUT or die "Can't close $file: $!";
1214 chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
1215 exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
1216 chdir $origdir;