lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 require Exporter;
   6 @ISA = Exporter;
   7 @EXPORT = qw(pod2html htmlify);
   8 use Cwd;
   9
  10 use Carp;
  11
  12 use strict;
  13
  14 =head1 NAME
  15
  16 Pod::HTML - module to convert pod files to HTML
  17
  18 =head1 SYNOPSIS
  19
  20     use Pod::HTML;
  21     pod2html([options]);
  22
  23 =head1 DESCRIPTION
  24
  25 Converts files from pod format (see L<perlpod>) to HTML format.  It
  26 can automatically generate indexes and cross-references, and it keeps
  27 a cache of things it knows how to cross-reference.
  28
  29 =head1 ARGUMENTS
  30
  31 Pod::Html takes the following arguments:
  32
  33 =over 4
  34
  35 =item help
  36
  37     --help
  38
  39 Displays the usage message.
  40
  41 =item htmlroot
  42
  43     --htmlroot=name
  44
  45 Sets the base URL for the HTML files.  When cross-references are made,
  46 the HTML root is prepended to the URL.
  47
  48 =item infile
  49
  50     --infile=name
  51
  52 Specify the pod file to convert.  Input is taken from STDIN if no
  53 infile is specified.
  54
  55 =item outfile
  56
  57     --outfile=name
  58
  59 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  60 is specified.
  61
  62 =item podroot
  63
  64     --podroot=name
  65
  66 Specify the base directory for finding library pods.
  67
  68 =item podpath
  69
  70     --podpath=name:...:name
  71
  72 Specify which subdirectories of the podroot contain pod files whose
  73 HTML converted forms can be linked-to in cross-references.
  74
  75 =item libpods
  76
  77     --libpods=name:...:name
  78
  79 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  80
  81 =item netscape
  82
  83     --netscape
  84
  85 Use Netscape HTML directives when applicable.
  86
  87 =item nonetscape
  88
  89     --nonetscape
  90
  91 Do not use Netscape HTML directives (default).
  92
  93 =item index
  94
  95     --index
  96
  97 Generate an index at the top of the HTML file (default behaviour).
  98
  99 =item noindex
 100
 101     --noindex
 102
 103 Do not generate an index at the top of the HTML file.
 104
 105
 106 =item recurse
 107
 108     --recurse
 109
 110 Recurse into subdirectories specified in podpath (default behaviour).
 111
 112 =item norecurse
 113
 114     --norecurse
 115
 116 Do not recurse into subdirectories specified in podpath.
 117
 118 =item title
 119
 120     --title=title
 121
 122 Specify the title of the resulting HTML file.
 123
 124 =item verbose
 125
 126     --verbose
 127
 128 Display progress messages.
 129
 130 =back
 131
 132 =head1 EXAMPLE
 133
 134     pod2html("pod2html",
 135              "--podpath=lib:ext:pod:vms",
 136              "--podroot=/usr/src/perl",
 137              "--htmlroot=/perl/nmanual",
 138              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 139              "--recurse",
 140              "--infile=foo.pod",
 141              "--outfile=/perl/nmanual/foo.html");
 142
 143 =head1 AUTHOR
 144
 145 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 146
 147 =head1 BUGS
 148
 149 Has trouble with C<> etc in = commands.
 150
 151 =head1 SEE ALSO
 152
 153 L<perlpod>
 154
 155 =head1 COPYRIGHT
 156
 157 This program is distributed under the Artistic License.
 158
 159 =cut
 160
 161 my $dircache = "pod2html-dircache";
 162 my $itemcache = "pod2html-itemcache";
 163
 164 my @begin_stack = ();           # begin/end stack
 165
 166 my @libpods = ();               # files to search for links from C<> directives
 167 my $htmlroot = "/";             # http-server base directory from which all
 168                                 #   relative paths in $podpath stem.
 169 my $htmlfile = "";              # write to stdout by default
 170 my $podfile = "";               # read from stdin by default
 171 my @podpath = ();               # list of directories containing library pods.
 172 my $podroot = ".";              # filesystem base directory from which all
 173                                 #   relative paths in $podpath stem.
 174 my $recurse = 1;                # recurse on subdirectories in $podpath.
 175 my $verbose = 0;                # not verbose by default
 176 my $doindex = 1;                # non-zero if we should generate an index
 177 my $listlevel = 0;              # current list depth
 178 my @listitem = ();              # stack of HTML commands to use when a =item is
 179                                 #   encountered.  the top of the stack is the
 180                                 #   current list.
 181 my @listdata = ();              # similar to @listitem, but for the text after
 182                                 #   an =item
 183 my @listend = ();               # similar to @listitem, but the text to use to
 184                                 #   end the list.
 185 my $ignore = 1;                 # whether or not to format text.  we don't
 186                                 #   format text until we hit our first pod
 187                                 #   directive.
 188
 189 my %items_named = ();           # for the multiples of the same item in perlfunc
 190 my @items_seen = ();
 191 my $netscape = 0;               # whether or not to use netscape directives.
 192 my $title;                      # title to give the pod(s)
 193 my $top = 1;                    # true if we are at the top of the doc.  used
 194                                 #   to prevent the first <HR> directive.
 195 my $paragraph;                  # which paragraph we're processing (used
 196                                 #   for error messages)
 197 my %pages = ();                 # associative array used to find the location
 198                                 #   of pages referenced by L<> links.
 199 my %sections = ();              # sections within this page
 200 my %items = ();                 # associative array used to find the location
 201                                 #   of =item directives referenced by C<> links
 202 my $Is83;                       # is dos with short filenames (8.3)
 203
 204 sub init_globals {
 205 $dircache = "pod2html-dircache";
 206 $itemcache = "pod2html-itemcache";
 207
 208 @begin_stack = ();              # begin/end stack
 209
 210 @libpods = ();          # files to search for links from C<> directives
 211 $htmlroot = "/";                # http-server base directory from which all
 212                                 #   relative paths in $podpath stem.
 213 $htmlfile = "";         # write to stdout by default
 214 $podfile = "";          # read from stdin by default
 215 @podpath = ();          # list of directories containing library pods.
 216 $podroot = ".";         # filesystem base directory from which all
 217                                 #   relative paths in $podpath stem.
 218 $recurse = 1;           # recurse on subdirectories in $podpath.
 219 $verbose = 0;           # not verbose by default
 220 $doindex = 1;                   # non-zero if we should generate an index
 221 $listlevel = 0;         # current list depth
 222 @listitem = ();         # stack of HTML commands to use when a =item is
 223                                 #   encountered.  the top of the stack is the
 224                                 #   current list.
 225 @listdata = ();         # similar to @listitem, but for the text after
 226                                 #   an =item
 227 @listend = ();          # similar to @listitem, but the text to use to
 228                                 #   end the list.
 229 $ignore = 1;                    # whether or not to format text.  we don't
 230                                 #   format text until we hit our first pod
 231                                 #   directive.
 232
 233 @items_seen = ();
 234 %items_named = ();
 235 $netscape = 0;          # whether or not to use netscape directives.
 236 $title = '';                    # title to give the pod(s)
 237 $top = 1;                       # true if we are at the top of the doc.  used
 238                                 #   to prevent the first <HR> directive.
 239 $paragraph = '';                        # which paragraph we're processing (used
 240                                 #   for error messages)
 241 %sections = ();         # sections within this page
 242
 243 # These are not reinitialised here but are kept as a cache.
 244 # See get_cache and related cache management code.
 245 #%pages = ();                   # associative array used to find the location
 246                                 #   of pages referenced by L<> links.
 247 #%items = ();                   # associative array used to find the location
 248                                 #   of =item directives referenced by C<> links
 249 $Is83=$^O eq 'dos';
 250 }
 251
 252 sub pod2html {
 253     local(@ARGV) = @_;
 254     local($/);
 255     local $_;
 256
 257     init_globals();
 258
 259     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 260
 261     # cache of %pages and %items from last time we ran pod2html
 262
 263     #undef $opt_help if defined $opt_help;
 264
 265     # parse the command-line parameters
 266     parse_command_line();
 267
 268     # set some variables to their default values if necessary
 269     local *POD;
 270     unless (@ARGV && $ARGV[0]) {
 271         $podfile  = "-" unless $podfile;        # stdin
 272         open(POD, "<$podfile")
 273                 || die "$0: cannot open $podfile file for input: $!\n";
 274     } else {
 275         $podfile = $ARGV[0];  # XXX: might be more filenames
 276         *POD = *ARGV;
 277     }
 278     $htmlfile = "-" unless $htmlfile;   # stdout
 279     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 280
 281     # read the pod a paragraph at a time
 282     warn "Scanning for sections in input file(s)\n" if $verbose;
 283     $/ = "";
 284     my @poddata  = <POD>;
 285     close(POD);
 286
 287     # scan the pod for =head[1-6] directives and build an index
 288     my $index = scan_headings(\%sections, @poddata);
 289
 290     unless($index) {
 291         warn "No pod in $podfile\n" if $verbose;
 292         return;
 293     }
 294
 295     # open the output file
 296     open(HTML, ">$htmlfile")
 297             || die "$0: cannot open $htmlfile file for output: $!\n";
 298
 299     # put a title in the HTML file
 300     $title = '';
 301     TITLE_SEARCH: {
 302         for (my $i = 0; $i < @poddata; $i++) {
 303             if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 304                 for my $para ( @poddata[$i, $i+1] ) {
 305                     last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+\s*.*)/s;
 306                 }
 307             }
 308
 309         }
 310     }
 311     if (!$title and $podfile =~ /\.pod$/) {
 312         # probably a split pod so take first =head[12] as title
 313         for (my $i = 0; $i < @poddata; $i++) {
 314             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 315         }
 316         warn "adopted '$title' as title for $podfile\n"
 317             if $verbose and $title;
 318     }
 319     unless ($title) {
 320         warn "$0: no title for $podfile";
 321         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 322         $title = ($podfile eq "-" ? 'No Title' : $1);
 323         warn "using $title" if $verbose;
 324     }
 325     print HTML <<END_OF_HEAD;
 326     <HTML>
 327         <HEAD>
 328             <TITLE>$title</TITLE>
 329         </HEAD>
 330
 331         <BODY>
 332
 333 END_OF_HEAD
 334
 335     # load/reload/validate/cache %pages and %items
 336     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 337
 338     # scan the pod for =item directives
 339     scan_items("", \%items, @poddata);
 340
 341     # put an index at the top of the file.  note, if $doindex is 0 we
 342     # still generate an index, but surround it with an html comment.
 343     # that way some other program can extract it if desired.
 344     $index =~ s/--+/-/g;
 345     print HTML "<!-- INDEX BEGIN -->\n";
 346     print HTML "<!--\n" unless $doindex;
 347     print HTML $index;
 348     print HTML "-->\n" unless $doindex;
 349     print HTML "<!-- INDEX END -->\n\n";
 350     print HTML "<HR>\n" if $doindex;
 351
 352     # now convert this file
 353     warn "Converting input file\n" if $verbose;
 354     foreach my $i (0..$#poddata) {
 355         $_ = $poddata[$i];
 356         $paragraph = $i+1;
 357         if (/^(=.*)/s) {        # is it a pod directive?
 358             $ignore = 0;
 359             $_ = $1;
 360             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 361                 process_begin($1, $2);
 362             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 363                 process_end($1, $2);
 364             } elsif (/^=cut/) {                 # =cut
 365                 process_cut();
 366             } elsif (/^=pod/) {                 # =pod
 367                 process_pod();
 368             } else {
 369                 next if @begin_stack && $begin_stack[-1] ne 'html';
 370
 371                 if (/^=(head[1-6])\s+(.*)/s) {  # =head[1-6] heading
 372                     process_head($1, $2);
 373                 } elsif (/^=item\s*(.*)/sm) {   # =item text
 374                     process_item($1);
 375                 } elsif (/^=over\s*(.*)/) {             # =over N
 376                     process_over();
 377                 } elsif (/^=back/) {            # =back
 378                     process_back();
 379                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 380                     process_for($1,$2);
 381                 } else {
 382                     /^=(\S*)\s*/;
 383                     warn "$0: $podfile: unknown pod directive '$1' in "
 384                        . "paragraph $paragraph.  ignoring.\n";
 385                 }
 386             }
 387             $top = 0;
 388         }
 389         else {
 390             next if $ignore;
 391             next if @begin_stack && $begin_stack[-1] ne 'html';
 392             my $text = $_;
 393             process_text(\$text, 1);
 394             print HTML "$text\n<P>\n\n";
 395         }
 396     }
 397
 398     # finish off any pending directives
 399     finish_list();
 400     print HTML <<END_OF_TAIL;
 401     </BODY>
 402
 403     </HTML>
 404 END_OF_TAIL
 405
 406     # close the html file
 407     close(HTML);
 408
 409     warn "Finished\n" if $verbose;
 410 }
 411
 412 ##############################################################################
 413
 414 my $usage;                      # see below
 415 sub usage {
 416     my $podfile = shift;
 417     warn "$0: $podfile: @_\n" if @_;
 418     die $usage;
 419 }
 420
 421 $usage =<<END_OF_USAGE;
 422 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 423            --podpath=<name>:...:<name> --podroot=<name>
 424            --libpods=<name>:...:<name> --recurse --verbose --index
 425            --netscape --norecurse --noindex
 426
 427   --flush      - flushes the item and directory caches.
 428   --help       - prints this message.
 429   --htmlroot   - http-server base directory from which all relative paths
 430                  in podpath stem (default is /).
 431   --index      - generate an index at the top of the resulting html
 432                  (default).
 433   --infile     - filename for the pod to convert (input taken from stdin
 434                  by default).
 435   --libpods    - colon-separated list of pages to search for =item pod
 436                  directives in as targets of C<> and implicit links (empty
 437                  by default).  note, these are not filenames, but rather
 438                  page names like those that appear in L<> links.
 439   --netscape   - will use netscape html directives when applicable.
 440   --nonetscape - will not use netscape directives (default).
 441   --outfile    - filename for the resulting html file (output sent to
 442                  stdout by default).
 443   --podpath    - colon-separated list of directories containing library
 444                  pods.  empty by default.
 445   --podroot    - filesystem base directory from which all relative paths
 446                  in podpath stem (default is .).
 447   --noindex    - don't generate an index at the top of the resulting html.
 448   --norecurse  - don't recurse on those subdirectories listed in podpath.
 449   --recurse    - recurse on those subdirectories listed in podpath
 450                  (default behavior).
 451   --title      - title that will appear in resulting html file.
 452   --verbose    - self-explanatory
 453
 454 END_OF_USAGE
 455
 456 sub parse_command_line {
 457     my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
 458     my $result = GetOptions(
 459                             'flush'      => \$opt_flush,
 460                             'help'       => \$opt_help,
 461                             'htmlroot=s' => \$opt_htmlroot,
 462                             'index!'     => \$opt_index,
 463                             'infile=s'   => \$opt_infile,
 464                             'libpods=s'  => \$opt_libpods,
 465                             'netscape!'  => \$opt_netscape,
 466                             'outfile=s'  => \$opt_outfile,
 467                             'podpath=s'  => \$opt_podpath,
 468                             'podroot=s'  => \$opt_podroot,
 469                             'norecurse'  => \$opt_norecurse,
 470                             'recurse!'   => \$opt_recurse,
 471                             'title=s'    => \$opt_title,
 472                             'verbose'    => \$opt_verbose,
 473                            );
 474     usage("-", "invalid parameters") if not $result;
 475
 476     usage("-") if defined $opt_help;    # see if the user asked for help
 477     $opt_help = "";                     # just to make -w shut-up.
 478
 479     $podfile  = $opt_infile if defined $opt_infile;
 480     $htmlfile = $opt_outfile if defined $opt_outfile;
 481
 482     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 483     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 484
 485     warn "Flushing item and directory caches\n"
 486         if $opt_verbose && defined $opt_flush;
 487     unlink($dircache, $itemcache) if defined $opt_flush;
 488
 489     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 490     $podroot  = $opt_podroot if defined $opt_podroot;
 491
 492     $doindex  = $opt_index if defined $opt_index;
 493     $recurse  = $opt_recurse if defined $opt_recurse;
 494     $title    = $opt_title if defined $opt_title;
 495     $verbose  = defined $opt_verbose ? 1 : 0;
 496     $netscape = $opt_netscape if defined $opt_netscape;
 497 }
 498
 499
 500 my $saved_cache_key;
 501
 502 sub get_cache {
 503     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 504     my @cache_key_args = @_;
 505
 506     # A first-level cache:
 507     # Don't bother reading the cache files if they still apply
 508     # and haven't changed since we last read them.
 509
 510     my $this_cache_key = cache_key(@cache_key_args);
 511
 512     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 513
 514     # load the cache of %pages and %items if possible.  $tests will be
 515     # non-zero if successful.
 516     my $tests = 0;
 517     if (-f $dircache && -f $itemcache) {
 518         warn "scanning for item cache\n" if $verbose;
 519         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 520     }
 521
 522     # if we didn't succeed in loading the cache then we must (re)build
 523     #  %pages and %items.
 524     if (!$tests) {
 525         warn "scanning directories in pod-path\n" if $verbose;
 526         scan_podpath($podroot, $recurse, 0);
 527     }
 528     $saved_cache_key = cache_key(@cache_key_args);
 529 }
 530
 531 sub cache_key {
 532     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 533     return join('!', $dircache, $itemcache, $recurse,
 534                 @$podpath, $podroot, stat($dircache), stat($itemcache));
 535 }
 536
 537 #
 538 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 539 #  are valid caches of %pages and %items.  if they are valid then it loads
 540 #  them and returns a non-zero value.
 541 #
 542
 543 sub load_cache {
 544     my($dircache, $itemcache, $podpath, $podroot) = @_;
 545     my($tests);
 546     local $_;
 547
 548     $tests = 0;
 549
 550     open(CACHE, "<$itemcache") ||
 551         die "$0: error opening $itemcache for reading: $!\n";
 552     $/ = "\n";
 553
 554     # is it the same podpath?
 555     $_ = <CACHE>;
 556     chomp($_);
 557     $tests++ if (join(":", @$podpath) eq $_);
 558
 559     # is it the same podroot?
 560     $_ = <CACHE>;
 561     chomp($_);
 562     $tests++ if ($podroot eq $_);
 563
 564     # load the cache if its good
 565     if ($tests != 2) {
 566         close(CACHE);
 567         return 0;
 568     }
 569
 570     warn "loading item cache\n" if $verbose;
 571     while (<CACHE>) {
 572         /(.*?) (.*)$/;
 573         $items{$1} = $2;
 574     }
 575     close(CACHE);
 576
 577     warn "scanning for directory cache\n" if $verbose;
 578     open(CACHE, "<$dircache") ||
 579         die "$0: error opening $dircache for reading: $!\n";
 580     $/ = "\n";
 581     $tests = 0;
 582
 583     # is it the same podpath?
 584     $_ = <CACHE>;
 585     chomp($_);
 586     $tests++ if (join(":", @$podpath) eq $_);
 587
 588     # is it the same podroot?
 589     $_ = <CACHE>;
 590     chomp($_);
 591     $tests++ if ($podroot eq $_);
 592
 593     # load the cache if its good
 594     if ($tests != 2) {
 595         close(CACHE);
 596         return 0;
 597     }
 598
 599     warn "loading directory cache\n" if $verbose;
 600     while (<CACHE>) {
 601         /(.*?) (.*)$/;
 602         $pages{$1} = $2;
 603     }
 604
 605     close(CACHE);
 606
 607     return 1;
 608 }
 609
 610 #
 611 # scan_podpath - scans the directories specified in @podpath for directories,
 612 #  .pod files, and .pm files.  it also scans the pod files specified in
 613 #  @libpods for =item directives.
 614 #
 615 sub scan_podpath {
 616     my($podroot, $recurse, $append) = @_;
 617     my($pwd, $dir);
 618     my($libpod, $dirname, $pod, @files, @poddata);
 619
 620     unless($append) {
 621         %items = ();
 622         %pages = ();
 623     }
 624
 625     # scan each directory listed in @podpath
 626     $pwd = getcwd();
 627     chdir($podroot)
 628         || die "$0: error changing to directory $podroot: $!\n";
 629     foreach $dir (@podpath) {
 630         scan_dir($dir, $recurse);
 631     }
 632
 633     # scan the pods listed in @libpods for =item directives
 634     foreach $libpod (@libpods) {
 635         # if the page isn't defined then we won't know where to find it
 636         # on the system.
 637         next unless defined $pages{$libpod} && $pages{$libpod};
 638
 639         # if there is a directory then use the .pod and .pm files within it.
 640         if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 641             #  find all the .pod and .pm files within the directory
 642             $dirname = $1;
 643             opendir(DIR, $dirname) ||
 644                 die "$0: error opening directory $dirname: $!\n";
 645             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 646             closedir(DIR);
 647
 648             # scan each .pod and .pm file for =item directives
 649             foreach $pod (@files) {
 650                 open(POD, "<$dirname/$pod") ||
 651                     die "$0: error opening $dirname/$pod for input: $!\n";
 652                 @poddata = <POD>;
 653                 close(POD);
 654
 655                 scan_items("$dirname/$pod", @poddata);
 656             }
 657
 658             # use the names of files as =item directives too.
 659             foreach $pod (@files) {
 660                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 661                 $items{$1} = "$dirname/$1.html" if $1;
 662             }
 663         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 664                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 665             # scan the .pod or .pm file for =item directives
 666             $pod = $1;
 667             open(POD, "<$pod") ||
 668                 die "$0: error opening $pod for input: $!\n";
 669             @poddata = <POD>;
 670             close(POD);
 671
 672             scan_items("$pod", @poddata);
 673         } else {
 674             warn "$0: shouldn't be here (line ".__LINE__."\n";
 675         }
 676     }
 677     @poddata = ();      # clean-up a bit
 678
 679     chdir($pwd)
 680         || die "$0: error changing to directory $pwd: $!\n";
 681
 682     # cache the item list for later use
 683     warn "caching items for later use\n" if $verbose;
 684     open(CACHE, ">$itemcache") ||
 685         die "$0: error open $itemcache for writing: $!\n";
 686
 687     print CACHE join(":", @podpath) . "\n$podroot\n";
 688     foreach my $key (keys %items) {
 689         print CACHE "$key $items{$key}\n";
 690     }
 691
 692     close(CACHE);
 693
 694     # cache the directory list for later use
 695     warn "caching directories for later use\n" if $verbose;
 696     open(CACHE, ">$dircache") ||
 697         die "$0: error open $dircache for writing: $!\n";
 698
 699     print CACHE join(":", @podpath) . "\n$podroot\n";
 700     foreach my $key (keys %pages) {
 701         print CACHE "$key $pages{$key}\n";
 702     }
 703
 704     close(CACHE);
 705 }
 706
 707 #
 708 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 709 #  files, and .pm files.  notes those that it finds.  this information will
 710 #  be used later in order to figure out where the pages specified in L<>
 711 #  links are on the filesystem.
 712 #
 713 sub scan_dir {
 714     my($dir, $recurse) = @_;
 715     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 716     local $_;
 717
 718     @subdirs = ();
 719     @pods = ();
 720
 721     opendir(DIR, $dir) ||
 722         die "$0: error opening directory $dir: $!\n";
 723     while (defined($_ = readdir(DIR))) {
 724         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 725             $pages{$_}  = "" unless defined $pages{$_};
 726             $pages{$_} .= "$dir/$_:";
 727             push(@subdirs, $_);
 728         } elsif (/\.pod$/) {                                # .pod
 729             s/\.pod$//;
 730             $pages{$_}  = "" unless defined $pages{$_};
 731             $pages{$_} .= "$dir/$_.pod:";
 732             push(@pods, "$dir/$_.pod");
 733         } elsif (/\.pm$/) {                                 # .pm
 734             s/\.pm$//;
 735             $pages{$_}  = "" unless defined $pages{$_};
 736             $pages{$_} .= "$dir/$_.pm:";
 737             push(@pods, "$dir/$_.pm");
 738         }
 739     }
 740     closedir(DIR);
 741
 742     # recurse on the subdirectories if necessary
 743     if ($recurse) {
 744         foreach my $subdir (@subdirs) {
 745             scan_dir("$dir/$subdir", $recurse);
 746         }
 747     }
 748 }
 749
 750 #
 751 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 752 #  build an index.
 753 #
 754 sub scan_headings {
 755     my($sections, @data) = @_;
 756     my($tag, $which_head, $title, $listdepth, $index);
 757
 758     # here we need      local $ignore = 0;
 759     #  unfortunately, we can't have it, because $ignore is lexical
 760     $ignore = 0;
 761
 762     $listdepth = 0;
 763     $index = "";
 764
 765     # scan for =head directives, note their name, and build an index
 766     #  pointing to each of them.
 767     foreach my $line (@data) {
 768         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 769             ($tag,$which_head, $title) = ($1,$2,$3);
 770             chomp($title);
 771             $$sections{htmlify(0,$title)} = 1;
 772
 773             if ($which_head > $listdepth) {
 774                 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 775             } elsif ($which_head < $listdepth) {
 776                 $listdepth--;
 777                 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 778             }
 779             $listdepth = $which_head;
 780
 781             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 782                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 783                       process_text(\$title, 0) . "</A>";
 784         }
 785     }
 786
 787     # finish off the lists
 788     while ($listdepth--) {
 789         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 790     }
 791
 792     # get rid of bogus lists
 793     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 794
 795     $ignore = 1;        # restore old value;
 796
 797     return $index;
 798 }
 799
 800 #
 801 # scan_items - scans the pod specified by $pod for =item directives.  we
 802 #  will use this information later on in resolving C<> links.
 803 #
 804 sub scan_items {
 805     my($pod, @poddata) = @_;
 806     my($i, $item);
 807     local $_;
 808
 809     $pod =~ s/\.pod$//;
 810     $pod .= ".html" if $pod;
 811
 812     foreach $i (0..$#poddata) {
 813         $_ = $poddata[$i];
 814
 815         # remove any formatting instructions
 816         s,[A-Z]<([^<>]*)>,$1,g;
 817
 818         # figure out what kind of item it is and get the first word of
 819         #  it's name.
 820         if (/^=item\s+(\w*)\s*.*$/s) {
 821             if ($1 eq "*") {            # bullet list
 822                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 823                 $item = $1;
 824             } elsif ($1 =~ /^[0-9]+/) { # numbered list
 825                 /\A=item\s+[0-9]+\.?(.*?)\s*\Z/s;
 826                 $item = $1;
 827             } else {
 828 #               /\A=item\s+(.*?)\s*\Z/s;
 829                 /\A=item\s+(\w*)/s;
 830                 $item = $1;
 831             }
 832
 833             $items{$item} = "$pod" if $item;
 834         }
 835     }
 836 }
 837
 838 #
 839 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 840 #
 841 sub process_head {
 842     my($tag, $heading) = @_;
 843     my $firstword;
 844
 845     # figure out the level of the =head
 846     $tag =~ /head([1-6])/;
 847     my $level = $1;
 848
 849     # can't have a heading full of spaces and speechmarks and so on
 850     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 851
 852     print HTML "<P>\n" unless $listlevel;
 853     print HTML "<HR>\n" unless $listlevel || $top;
 854     print HTML "<H$level>"; # unless $listlevel;
 855     #print HTML "<H$level>" unless $listlevel;
 856     my $convert = $heading; process_text(\$convert, 0);
 857     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 858     print HTML "</H$level>"; # unless $listlevel;
 859     print HTML "\n";
 860 }
 861
 862 #
 863 # process_item - convert a pod item tag and convert it to HTML format.
 864 #
 865 sub process_item {
 866     my $text = $_[0];
 867     my($i, $quote, $name);
 868
 869     my $need_preamble = 0;
 870     my $this_entry;
 871
 872
 873     # lots of documents start a list without doing an =over.  this is
 874     # bad!  but, the proper thing to do seems to be to just assume
 875     # they did do an =over.  so warn them once and then continue.
 876     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 877         unless $listlevel;
 878     process_over() unless $listlevel;
 879
 880     return unless $listlevel;
 881
 882     # remove formatting instructions from the text
 883     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 884     pre_escape(\$text);
 885
 886     $need_preamble = $items_seen[$listlevel]++ == 0;
 887
 888     # check if this is the first =item after an =over
 889     $i = $listlevel - 1;
 890     my $need_new = $listlevel >= @listitem;
 891
 892     if ($text =~ /\A\*/) {              # bullet
 893
 894         if ($need_preamble) {
 895             push(@listend,  "</UL>");
 896             print HTML "<UL>\n";
 897         }
 898
 899        print HTML "<LI><STRONG>";
 900        $text =~ /\A\*\s*(.*)\Z/s;
 901        print HTML "<A NAME=\"item_" . htmlify(1,$1) . "\">" if $1 && !$items_named{$1}++;
 902        $quote = 1;
 903        #print HTML process_puretext($1, \$quote);
 904        print HTML $1;
 905        print HTML "</A>" if $1;
 906        print HTML "</STRONG>";
 907
 908     } elsif ($text =~ /\A[0-9#]+/) {    # numbered list
 909
 910         if ($need_preamble) {
 911             push(@listend,  "</OL>");
 912             print HTML "<OL>\n";
 913         }
 914
 915        print HTML "<LI><STRONG>";
 916        $text =~ /\A[0-9]+\.?(.*)\Z/s;
 917        print HTML "<A NAME=\"item_" . htmlify(0,$1) . "\">" if $1;
 918        $quote = 1;
 919        #print HTML process_puretext($1, \$quote);
 920        print HTML $1 if $1;
 921        print HTML "</A>" if $1;
 922        print HTML "</STRONG>";
 923
 924     } else {                    # all others
 925
 926         if ($need_preamble) {
 927             push(@listend,  '</DL>');
 928             print HTML "<DL>\n";
 929         }
 930
 931        print HTML "<DT><STRONG>";
 932        print HTML "<A NAME=\"item_" . htmlify(1,$text) . "\">"
 933             if $text && !$items_named{($text =~ /(\S+)/)[0]}++;
 934             # preceding craziness so that the duplicate leading bits in
 935             # perlfunc work to find just the first one.  otherwise
 936             # open etc would have many names
 937        $quote = 1;
 938        #print HTML process_puretext($text, \$quote);
 939        print HTML $text;
 940        print HTML "</A>" if $text;
 941        print HTML "</STRONG>";
 942
 943        print HTML '<DD>';
 944     }
 945
 946     print HTML "\n";
 947 }
 948
 949 #
 950 # process_over - process a pod over tag and start a corresponding HTML
 951 # list.
 952 #
 953 sub process_over {
 954     # start a new list
 955     $listlevel++;
 956 }
 957
 958 #
 959 # process_back - process a pod back tag and convert it to HTML format.
 960 #
 961 sub process_back {
 962     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
 963         unless $listlevel;
 964     return unless $listlevel;
 965
 966     # close off the list.  note, I check to see if $listend[$listlevel] is
 967     # defined because an =item directive may have never appeared and thus
 968     # $listend[$listlevel] may have never been initialized.
 969     $listlevel--;
 970     print HTML $listend[$listlevel] if defined $listend[$listlevel];
 971     print HTML "\n";
 972
 973     # don't need the corresponding perl code anymore
 974     pop(@listitem);
 975     pop(@listdata);
 976     pop(@listend);
 977
 978     pop(@items_seen);
 979 }
 980
 981 #
 982 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
 983 #
 984 sub process_cut {
 985     $ignore = 1;
 986 }
 987
 988 #
 989 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
 990 # corresponding cut.
 991 #
 992 sub process_pod {
 993     # no need to set $ignore to 0 cause the main loop did it
 994 }
 995
 996 #
 997 # process_for - process a =for pod tag.  if it's for html, split
 998 # it out verbatim, otherwise ignore it.
 999 #
1000 sub process_for {
1001     my($whom, $text) = @_;
1002     if ( $whom =~ /^(pod2)?html$/i) {
1003         print HTML $text;
1004     }
1005 }
1006
1007 #
1008 # process_begin - process a =begin pod tag.  this pushes
1009 # whom we're beginning on the begin stack.  if there's a
1010 # begin stack, we only print if it us.
1011 #
1012 sub process_begin {
1013     my($whom, $text) = @_;
1014     $whom = lc($whom);
1015     push (@begin_stack, $whom);
1016     if ( $whom =~ /^(pod2)?html$/) {
1017         print HTML $text if $text;
1018     }
1019 }
1020
1021 #
1022 # process_end - process a =end pod tag.  pop the
1023 # begin stack.  die if we're mismatched.
1024 #
1025 sub process_end {
1026     my($whom, $text) = @_;
1027     $whom = lc($whom);
1028     if ($begin_stack[-1] ne $whom ) {
1029         die "Unmatched begin/end at chunk $paragraph\n"
1030     }
1031     pop @begin_stack;
1032 }
1033
1034 #
1035 # process_text - handles plaintext that appears in the input pod file.
1036 # there may be pod commands embedded within the text so those must be
1037 # converted to html commands.
1038 #
1039 sub process_text {
1040     my($text, $escapeQuotes) = @_;
1041     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1042     my($podcommand, $params, $tag, $quote);
1043
1044     return if $ignore;
1045
1046     $quote  = 0;                # status of double-quote conversion
1047     $result = "";
1048     $rest = $$text;
1049
1050     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1051         $rest =~ s/\n+\Z//;
1052         $rest =~ s#.*#
1053             my $line = $&;
1054             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1055             $line;
1056         #eg;
1057
1058         $rest   =~ s/&/&amp;/g;
1059         $rest   =~ s/</&lt;/g;
1060         $rest   =~ s/>/&gt;/g;
1061         $rest   =~ s/"/&quot;/g;
1062
1063         # try and create links for all occurrences of perl.* within
1064         # the preformatted text.
1065         $rest =~ s{
1066                     (\s*)(perl\w+)
1067                   }{
1068                     if (defined $pages{$2}) {   # is a link
1069                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1070                     } elsif (defined $pages{dosify($2)}) {      # is a link
1071                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1072                     } else {
1073                         "$1$2";
1074                     }
1075                   }xeg;
1076         $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1077
1078   my $urls = '(' . join ('|', qw{
1079                 http
1080                 telnet
1081                 mailto
1082                 news
1083                 gopher
1084                 file
1085                 wais
1086                 ftp
1087             } )
1088         . ')';
1089
1090   my $ltrs = '\w';
1091   my $gunk = '/#~:.?+=&%@!\-';
1092   my $punc = '.:?\-';
1093   my $any  = "${ltrs}${gunk}${punc}";
1094
1095   $rest =~ s{
1096         \b                          # start at word boundary
1097         (                           # begin $1  {
1098           $urls     :               # need resource and a colon
1099           [$any] +?                 # followed by on or more
1100                                     #  of any valid character, but
1101                                     #  be conservative and take only
1102                                     #  what you need to....
1103         )                           # end   $1  }
1104         (?=                         # look-ahead non-consumptive assertion
1105                 [$punc]*            # either 0 or more puntuation
1106                 [^$any]             #   followed by a non-url char
1107             |                       # or else
1108                 $                   #   then end of the string
1109         )
1110       }{<A HREF="$1">$1</A>}igox;
1111
1112         $result =   "<PRE>"     # text should be as it is (verbatim)
1113                   . "$rest\n"
1114                   . "</PRE>\n";
1115     } else {                    # formatted text
1116         # parse through the string, stopping each time we find a
1117         # pod-escape.  once the string has been throughly processed
1118         # we can output it.
1119         while (length $rest) {
1120             # check to see if there are any possible pod directives in
1121             # the remaining part of the text.
1122             if ($rest =~ m/[BCEIFLSZ]</) {
1123                 warn "\$rest\t= $rest\n" unless
1124                     $rest =~ /\A
1125                            ([^<]*?)
1126                            ([BCEIFLSZ]?)
1127                            <
1128                            (.*)\Z/xs;
1129
1130                 $s1 = $1;       # pure text
1131                 $s2 = $2;       # the type of pod-escape that follows
1132                 $s3 = '<';      # '<'
1133                 $s4 = $3;       # the rest of the string
1134             } else {
1135                 $s1 = $rest;
1136                 $s2 = "";
1137                 $s3 = "";
1138                 $s4 = "";
1139             }
1140
1141             if ($s3 eq '<' && $s2) {    # a pod-escape
1142                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1143                 $podcommand = "$s2<";
1144                 $rest       = $s4;
1145
1146                 # find the matching '>'
1147                 $match = 1;
1148                 $bf = 0;
1149                 while ($match && !$bf) {
1150                     $bf = 1;
1151                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1152                         $bf = 0;
1153                         $match++;
1154                         $podcommand .= $1;
1155                         $rest        = $2;
1156                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1157                         $bf = 0;
1158                         $match--;
1159                         $podcommand .= $1;
1160                         $rest        = $2;
1161                     }
1162                 }
1163
1164                 if ($match != 0) {
1165                     warn <<WARN;
1166 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1167 WARN
1168                     $result .= substr $podcommand, 0, 2;
1169                     $rest = substr($podcommand, 2) . $rest;
1170                     next;
1171                 }
1172
1173                 # pull out the parameters to the pod-escape
1174                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1175                 $tag    = $1;
1176                 $params = $2;
1177
1178                 # process the text within the pod-escape so that any escapes
1179                 # which must occur do.
1180                 process_text(\$params, 0) unless $tag eq 'L';
1181
1182                 $s1 = $params;
1183                 if (!$tag || $tag eq " ") {     #  <> : no tag
1184                     $s1 = "&lt;$params&gt;";
1185                 } elsif ($tag eq "L") {         # L<> : link
1186                     $s1 = process_L($params);
1187                 } elsif ($tag eq "I" ||         # I<> : italicize text
1188                          $tag eq "B" ||         # B<> : bold text
1189                          $tag eq "F") {         # F<> : file specification
1190                     $s1 = process_BFI($tag, $params);
1191                 } elsif ($tag eq "C") {         # C<> : literal code
1192                     $s1 = process_C($params, 1);
1193                 } elsif ($tag eq "E") {         # E<> : escape
1194                     $s1 = process_E($params);
1195                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1196                     $s1 = process_Z($params);
1197                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1198                     $s1 = process_S($params);
1199                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1200                     $s1 = process_X($params);
1201                 } else {
1202                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1203                 }
1204
1205                 $result .= "$s1";
1206             } else {
1207                 # for pure text we must deal with implicit links and
1208                 # double-quotes among other things.
1209                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1210                 $rest    = $s4;
1211             }
1212         }
1213     }
1214     $$text = $result;
1215 }
1216
1217 sub html_escape {
1218     my $rest = $_[0];
1219     $rest   =~ s/&/&amp;/g;
1220     $rest   =~ s/</&lt;/g;
1221     $rest   =~ s/>/&gt;/g;
1222     $rest   =~ s/"/&quot;/g;
1223     return $rest;
1224 }
1225
1226 #
1227 # process_puretext - process pure text (without pod-escapes) converting
1228 #  double-quotes and handling implicit C<> links.
1229 #
1230 sub process_puretext {
1231     my($text, $quote) = @_;
1232     my(@words, $result, $rest, $lead, $trail);
1233
1234     # convert double-quotes to single-quotes
1235     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1236     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1237
1238     $$quote = ($text =~ m/"/ ? 1 : 0);
1239     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1240
1241     # keep track of leading and trailing white-space
1242     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1243     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1244
1245     # collapse all white space into a single space
1246     $text =~ s/\s+/ /g;
1247     @words = split(" ", $text);
1248
1249     # process each word individually
1250     foreach my $word (@words) {
1251         # see if we can infer a link
1252         if ($word =~ /^\w+\(/) {
1253             # has parenthesis so should have been a C<> ref
1254             $word = process_C($word);
1255 #           $word =~ /^[^()]*]\(/;
1256 #           if (defined $items{$1} && $items{$1}) {
1257 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1258 #                       . htmlify(0,$word)
1259 #                       . "\">$word</A></CODE>";
1260 #           } elsif (defined $items{$word} && $items{$word}) {
1261 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1262 #                       . htmlify(0,$word)
1263 #                       . "\">$word</A></CODE>";
1264 #           } else {
1265 #               $word =   "\n<CODE><A HREF=\"#item_"
1266 #                       . htmlify(0,$word)
1267 #                       . "\">$word</A></CODE>";
1268 #           }
1269         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1270             # perl variables, should be a C<> ref
1271             $word = process_C($word, 1);
1272         } elsif ($word =~ m,^\w+://\w,) {
1273             # looks like a URL
1274             $word = qq(<A HREF="$word">$word</A>);
1275         } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1276             # looks like an e-mail address
1277             $word = qq(<A HREF="MAILTO:$word">$word</A>);
1278         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1279             $word = html_escape($word) if $word =~ /[&<>]/;
1280             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1281         } else {
1282             $word = html_escape($word) if $word =~ /[&<>]/;
1283         }
1284     }
1285
1286     # build a new string based upon our conversion
1287     $result = "";
1288     $rest   = join(" ", @words);
1289     while (length($rest) > 75) {
1290         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1291              $rest =~ m/^(\S*)\s(.*?)$/o) {
1292
1293             $result .= "$1\n";
1294             $rest    = $2;
1295         } else {
1296             $result .= "$rest\n";
1297             $rest    = "";
1298         }
1299     }
1300     $result .= $rest if $rest;
1301
1302     # restore the leading and trailing white-space
1303     $result = "$lead$result$trail";
1304
1305     return $result;
1306 }
1307
1308 #
1309 # pre_escape - convert & in text to $amp;
1310 #
1311 sub pre_escape {
1312     my($str) = @_;
1313
1314     $$str =~ s,&,&amp;,g;
1315 }
1316
1317 #
1318 # dosify - convert filenames to 8.3
1319 #
1320 sub dosify {
1321     my($str) = @_;
1322     if ($Is83) {
1323         $str = lc $str;
1324         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1325         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1326     }
1327     return $str;
1328 }
1329
1330 #
1331 # process_L - convert a pod L<> directive to a corresponding HTML link.
1332 #  most of the links made are inferred rather than known about directly
1333 #  (i.e it's not known whether the =head\d section exists in the target file,
1334 #   or whether a .pod file exists in the case of split files).  however, the
1335 #  guessing usually works.
1336 #
1337 # Unlike the other directives, this should be called with an unprocessed
1338 # string, else tags in the link won't be matched.
1339 #
1340 sub process_L {
1341     my($str) = @_;
1342     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1343
1344     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1345     $s1 = $str;
1346     for ($s1) {
1347         # LREF: a la HREF L<show this text|man/section>
1348         $linktext = $1 if s:^([^|]+)\|::;
1349
1350         # a :: acts like a /
1351         s,::,/,;
1352
1353         # make sure sections start with a /
1354         s,^",/",g;
1355         s,^,/,g if (!m,/, && / /);
1356
1357         # check if there's a section specified
1358         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1359             ($page, $section) = ($1, $2);
1360         } else {                        # no
1361             ($page, $section) = ($str, "");
1362         }
1363
1364         # check if we know that this is a section in this page
1365         if (!defined $pages{$page} && defined $sections{$page}) {
1366             $section = $page;
1367             $page = "";
1368         }
1369     }
1370
1371     $page83=dosify($page);
1372     $page=$page83 if (defined $pages{$page83});
1373     if ($page eq "") {
1374         $link = "#" . htmlify(0,$section);
1375         $linktext = $section unless defined($linktext);
1376     } elsif (!defined $pages{$page}) {
1377         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1378         $link = "";
1379         $linktext = $page unless defined($linktext);
1380     } else {
1381         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1382         $section = htmlify(0,$section) if $section ne "";
1383
1384         # if there is a directory by the name of the page, then assume that an
1385         # appropriate section will exist in the subdirectory
1386         if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1387             $link = "$htmlroot/$1/$section.html";
1388
1389         # since there is no directory by the name of the page, the section will
1390         # have to exist within a .html of the same name.  thus, make sure there
1391         # is a .pod or .pm that might become that .html
1392         } else {
1393             $section = "#$section";
1394             # check if there is a .pod with the page name
1395             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1396                 $link = "$htmlroot/$1.html$section";
1397             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1398                 $link = "$htmlroot/$1.html$section";
1399             } else {
1400                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1401                              "no .pod or .pm found\n";
1402                 $link = "";
1403                 $linktext = $section unless defined($linktext);
1404             }
1405         }
1406     }
1407
1408     process_text(\$linktext, 0);
1409     if ($link) {
1410         $s1 = "<A HREF=\"$link\">$linktext</A>";
1411     } else {
1412         $s1 = "<EM>$linktext</EM>";
1413     }
1414     return $s1;
1415 }
1416
1417 #
1418 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1419 # convert them to corresponding HTML directives.
1420 #
1421 sub process_BFI {
1422     my($tag, $str) = @_;
1423     my($s1);                    # work string
1424     my(%repltext) = (   'B' => 'STRONG',
1425                         'F' => 'EM',
1426                         'I' => 'EM');
1427
1428     # extract the modified text and convert to HTML
1429     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1430     return $s1;
1431 }
1432
1433 #
1434 # process_C - process the C<> pod-escape.
1435 #
1436 sub process_C {
1437     my($str, $doref) = @_;
1438     my($s1, $s2);
1439
1440     $s1 = $str;
1441     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1442     $s2 = $s1;
1443     $s1 =~ s/\W//g;             # delete bogus characters
1444
1445     # if there was a pod file that we found earlier with an appropriate
1446     # =item directive, then create a link to that page.
1447     if ($doref && defined $items{$s1}) {
1448         $s1 = ($items{$s1} ?
1449                "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) .  "\">$str</A>" :
1450                "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>");
1451         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1452         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1453     } else {
1454         $s1 = "<CODE>$str</CODE>";
1455         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1456     }
1457
1458
1459     return $s1;
1460 }
1461
1462 #
1463 # process_E - process the E<> pod directive which seems to escape a character.
1464 #
1465 sub process_E {
1466     my($str) = @_;
1467
1468     for ($str) {
1469         s,([^/].*),\&$1\;,g;
1470     }
1471
1472     return $str;
1473 }
1474
1475 #
1476 # process_Z - process the Z<> pod directive which really just amounts to
1477 # ignoring it.  this allows someone to start a paragraph with an =
1478 #
1479 sub process_Z {
1480     my($str) = @_;
1481
1482     # there is no equivalent in HTML for this so just ignore it.
1483     $str = "";
1484     return $str;
1485 }
1486
1487 #
1488 # process_S - process the S<> pod directive which means to convert all
1489 # spaces in the string to non-breaking spaces (in HTML-eze).
1490 #
1491 sub process_S {
1492     my($str) = @_;
1493
1494     # convert all spaces in the text to non-breaking spaces in HTML.
1495     $str =~ s/ /&nbsp;/g;
1496     return $str;
1497 }
1498
1499 #
1500 # process_X - this is supposed to make an index entry.  we'll just
1501 # ignore it.
1502 #
1503 sub process_X {
1504     return '';
1505 }
1506
1507
1508 #
1509 # finish_list - finish off any pending HTML lists.  this should be called
1510 # after the entire pod file has been read and converted.
1511 #
1512 sub finish_list {
1513     while ($listlevel >= 0) {
1514         print HTML "</DL>\n";
1515         $listlevel--;
1516     }
1517 }
1518
1519 #
1520 # htmlify - converts a pod section specification to a suitable section
1521 # specification for HTML.  if first arg is 1, only takes 1st word.
1522 #
1523 sub htmlify {
1524     my($compact, $heading) = @_;
1525
1526     if ($compact) {
1527       $heading =~ /^(\w+)/;
1528       $heading = $1;
1529     }
1530
1531   # $heading = lc($heading);
1532   $heading =~ s/[^\w\s]/_/g;
1533   $heading =~ s/(\s+)/ /g;
1534   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1535   $heading =~ s/ /_/g;
1536   $heading =~ s/\A(.{32}).*\Z/$1/s;
1537   $heading =~ s/\s+\Z//;
1538   $heading =~ s/_{2,}/_/g;
1539
1540   return $heading;
1541 }
1542
1543 BEGIN {
1544 }
1545
1546 1;
1547