lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 require Exporter;
   6 use vars qw($VERSION);
   7 $VERSION = 1.01;
   8 @ISA = Exporter;
   9 @EXPORT = qw(pod2html htmlify);
  10 use Cwd;
  11
  12 use Carp;
  13
  14 use strict;
  15
  16 use Config;
  17
  18 =head1 NAME
  19
  20 Pod::Html - module to convert pod files to HTML
  21
  22 =head1 SYNOPSIS
  23
  24     use Pod::Html;
  25     pod2html([options]);
  26
  27 =head1 DESCRIPTION
  28
  29 Converts files from pod format (see L<perlpod>) to HTML format.  It
  30 can automatically generate indexes and cross-references, and it keeps
  31 a cache of things it knows how to cross-reference.
  32
  33 =head1 ARGUMENTS
  34
  35 Pod::Html takes the following arguments:
  36
  37 =over 4
  38
  39 =item help
  40
  41     --help
  42
  43 Displays the usage message.
  44
  45 =item htmlroot
  46
  47     --htmlroot=name
  48
  49 Sets the base URL for the HTML files.  When cross-references are made,
  50 the HTML root is prepended to the URL.
  51
  52 =item infile
  53
  54     --infile=name
  55
  56 Specify the pod file to convert.  Input is taken from STDIN if no
  57 infile is specified.
  58
  59 =item outfile
  60
  61     --outfile=name
  62
  63 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  64 is specified.
  65
  66 =item podroot
  67
  68     --podroot=name
  69
  70 Specify the base directory for finding library pods.
  71
  72 =item podpath
  73
  74     --podpath=name:...:name
  75
  76 Specify which subdirectories of the podroot contain pod files whose
  77 HTML converted forms can be linked-to in cross-references.
  78
  79 =item libpods
  80
  81     --libpods=name:...:name
  82
  83 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  84
  85 =item netscape
  86
  87     --netscape
  88
  89 Use Netscape HTML directives when applicable.
  90
  91 =item nonetscape
  92
  93     --nonetscape
  94
  95 Do not use Netscape HTML directives (default).
  96
  97 =item index
  98
  99     --index
 100
 101 Generate an index at the top of the HTML file (default behaviour).
 102
 103 =item noindex
 104
 105     --noindex
 106
 107 Do not generate an index at the top of the HTML file.
 108
 109
 110 =item recurse
 111
 112     --recurse
 113
 114 Recurse into subdirectories specified in podpath (default behaviour).
 115
 116 =item norecurse
 117
 118     --norecurse
 119
 120 Do not recurse into subdirectories specified in podpath.
 121
 122 =item title
 123
 124     --title=title
 125
 126 Specify the title of the resulting HTML file.
 127
 128 =item verbose
 129
 130     --verbose
 131
 132 Display progress messages.
 133
 134 =back
 135
 136 =head1 EXAMPLE
 137
 138     pod2html("pod2html",
 139              "--podpath=lib:ext:pod:vms",
 140              "--podroot=/usr/src/perl",
 141              "--htmlroot=/perl/nmanual",
 142              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 143              "--recurse",
 144              "--infile=foo.pod",
 145              "--outfile=/perl/nmanual/foo.html");
 146
 147 =head1 AUTHOR
 148
 149 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 150
 151 =head1 BUGS
 152
 153 Has trouble with C<> etc in = commands.
 154
 155 =head1 SEE ALSO
 156
 157 L<perlpod>
 158
 159 =head1 COPYRIGHT
 160
 161 This program is distributed under the Artistic License.
 162
 163 =cut
 164
 165 my $dircache = "pod2html-dircache";
 166 my $itemcache = "pod2html-itemcache";
 167
 168 my @begin_stack = ();           # begin/end stack
 169
 170 my @libpods = ();               # files to search for links from C<> directives
 171 my $htmlroot = "/";             # http-server base directory from which all
 172                                 #   relative paths in $podpath stem.
 173 my $htmlfile = "";              # write to stdout by default
 174 my $podfile = "";               # read from stdin by default
 175 my @podpath = ();               # list of directories containing library pods.
 176 my $podroot = ".";              # filesystem base directory from which all
 177                                 #   relative paths in $podpath stem.
 178 my $recurse = 1;                # recurse on subdirectories in $podpath.
 179 my $verbose = 0;                # not verbose by default
 180 my $doindex = 1;                # non-zero if we should generate an index
 181 my $listlevel = 0;              # current list depth
 182 my @listitem = ();              # stack of HTML commands to use when a =item is
 183                                 #   encountered.  the top of the stack is the
 184                                 #   current list.
 185 my @listdata = ();              # similar to @listitem, but for the text after
 186                                 #   an =item
 187 my @listend = ();               # similar to @listitem, but the text to use to
 188                                 #   end the list.
 189 my $ignore = 1;                 # whether or not to format text.  we don't
 190                                 #   format text until we hit our first pod
 191                                 #   directive.
 192
 193 my %items_named = ();           # for the multiples of the same item in perlfunc
 194 my @items_seen = ();
 195 my $netscape = 0;               # whether or not to use netscape directives.
 196 my $title;                      # title to give the pod(s)
 197 my $top = 1;                    # true if we are at the top of the doc.  used
 198                                 #   to prevent the first <HR> directive.
 199 my $paragraph;                  # which paragraph we're processing (used
 200                                 #   for error messages)
 201 my %pages = ();                 # associative array used to find the location
 202                                 #   of pages referenced by L<> links.
 203 my %sections = ();              # sections within this page
 204 my %items = ();                 # associative array used to find the location
 205                                 #   of =item directives referenced by C<> links
 206 my $Is83;                       # is dos with short filenames (8.3)
 207
 208 sub init_globals {
 209 $dircache = "pod2html-dircache";
 210 $itemcache = "pod2html-itemcache";
 211
 212 @begin_stack = ();              # begin/end stack
 213
 214 @libpods = ();          # files to search for links from C<> directives
 215 $htmlroot = "/";                # http-server base directory from which all
 216                                 #   relative paths in $podpath stem.
 217 $htmlfile = "";         # write to stdout by default
 218 $podfile = "";          # read from stdin by default
 219 @podpath = ();          # list of directories containing library pods.
 220 $podroot = ".";         # filesystem base directory from which all
 221                                 #   relative paths in $podpath stem.
 222 $recurse = 1;           # recurse on subdirectories in $podpath.
 223 $verbose = 0;           # not verbose by default
 224 $doindex = 1;                   # non-zero if we should generate an index
 225 $listlevel = 0;         # current list depth
 226 @listitem = ();         # stack of HTML commands to use when a =item is
 227                                 #   encountered.  the top of the stack is the
 228                                 #   current list.
 229 @listdata = ();         # similar to @listitem, but for the text after
 230                                 #   an =item
 231 @listend = ();          # similar to @listitem, but the text to use to
 232                                 #   end the list.
 233 $ignore = 1;                    # whether or not to format text.  we don't
 234                                 #   format text until we hit our first pod
 235                                 #   directive.
 236
 237 @items_seen = ();
 238 %items_named = ();
 239 $netscape = 0;          # whether or not to use netscape directives.
 240 $title = '';                    # title to give the pod(s)
 241 $top = 1;                       # true if we are at the top of the doc.  used
 242                                 #   to prevent the first <HR> directive.
 243 $paragraph = '';                        # which paragraph we're processing (used
 244                                 #   for error messages)
 245 %sections = ();         # sections within this page
 246
 247 # These are not reinitialised here but are kept as a cache.
 248 # See get_cache and related cache management code.
 249 #%pages = ();                   # associative array used to find the location
 250                                 #   of pages referenced by L<> links.
 251 #%items = ();                   # associative array used to find the location
 252                                 #   of =item directives referenced by C<> links
 253 $Is83=$^O eq 'dos';
 254 }
 255
 256 sub pod2html {
 257     local(@ARGV) = @_;
 258     local($/);
 259     local $_;
 260
 261     init_globals();
 262
 263     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 264
 265     # cache of %pages and %items from last time we ran pod2html
 266
 267     #undef $opt_help if defined $opt_help;
 268
 269     # parse the command-line parameters
 270     parse_command_line();
 271
 272     # set some variables to their default values if necessary
 273     local *POD;
 274     unless (@ARGV && $ARGV[0]) {
 275         $podfile  = "-" unless $podfile;        # stdin
 276         open(POD, "<$podfile")
 277                 || die "$0: cannot open $podfile file for input: $!\n";
 278     } else {
 279         $podfile = $ARGV[0];  # XXX: might be more filenames
 280         *POD = *ARGV;
 281     }
 282     $htmlfile = "-" unless $htmlfile;   # stdout
 283     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 284
 285     # read the pod a paragraph at a time
 286     warn "Scanning for sections in input file(s)\n" if $verbose;
 287     $/ = "";
 288     my @poddata  = <POD>;
 289     close(POD);
 290
 291     # scan the pod for =head[1-6] directives and build an index
 292     my $index = scan_headings(\%sections, @poddata);
 293
 294     unless($index) {
 295         warn "No pod in $podfile\n" if $verbose;
 296         return;
 297     }
 298
 299     # open the output file
 300     open(HTML, ">$htmlfile")
 301             || die "$0: cannot open $htmlfile file for output: $!\n";
 302
 303     # put a title in the HTML file
 304     $title = '';
 305     TITLE_SEARCH: {
 306         for (my $i = 0; $i < @poddata; $i++) {
 307             if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 308                 for my $para ( @poddata[$i, $i+1] ) {
 309                     last TITLE_SEARCH if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 310                 }
 311             }
 312
 313         }
 314     }
 315     if (!$title and $podfile =~ /\.pod$/) {
 316         # probably a split pod so take first =head[12] as title
 317         for (my $i = 0; $i < @poddata; $i++) {
 318             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 319         }
 320         warn "adopted '$title' as title for $podfile\n"
 321             if $verbose and $title;
 322     }
 323     if ($title) {
 324         $title =~ s/\s*\(.*\)//;
 325     } else {
 326         warn "$0: no title for $podfile";
 327         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 328         $title = ($podfile eq "-" ? 'No Title' : $1);
 329         warn "using $title" if $verbose;
 330     }
 331     print HTML <<END_OF_HEAD;
 332 <HTML>
 333 <HEAD>
 334 <TITLE>$title</TITLE>
 335 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 336 </HEAD>
 337
 338 <BODY>
 339
 340 END_OF_HEAD
 341
 342     # load/reload/validate/cache %pages and %items
 343     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 344
 345     # scan the pod for =item directives
 346     scan_items("", \%items, @poddata);
 347
 348     # put an index at the top of the file.  note, if $doindex is 0 we
 349     # still generate an index, but surround it with an html comment.
 350     # that way some other program can extract it if desired.
 351     $index =~ s/--+/-/g;
 352     print HTML "<!-- INDEX BEGIN -->\n";
 353     print HTML "<!--\n" unless $doindex;
 354     print HTML $index;
 355     print HTML "-->\n" unless $doindex;
 356     print HTML "<!-- INDEX END -->\n\n";
 357     print HTML "<HR>\n" if $doindex;
 358
 359     # now convert this file
 360     warn "Converting input file\n" if $verbose;
 361     foreach my $i (0..$#poddata) {
 362         $_ = $poddata[$i];
 363         $paragraph = $i+1;
 364         if (/^(=.*)/s) {        # is it a pod directive?
 365             $ignore = 0;
 366             $_ = $1;
 367             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 368                 process_begin($1, $2);
 369             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 370                 process_end($1, $2);
 371             } elsif (/^=cut/) {                 # =cut
 372                 process_cut();
 373             } elsif (/^=pod/) {                 # =pod
 374                 process_pod();
 375             } else {
 376                 next if @begin_stack && $begin_stack[-1] ne 'html';
 377
 378                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 379                     process_head($1, $2);
 380                 } elsif (/^=item\s*(.*\S)/sm) { # =item text
 381                     process_item($1);
 382                 } elsif (/^=over\s*(.*)/) {             # =over N
 383                     process_over();
 384                 } elsif (/^=back/) {            # =back
 385                     process_back();
 386                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 387                     process_for($1,$2);
 388                 } else {
 389                     /^=(\S*)\s*/;
 390                     warn "$0: $podfile: unknown pod directive '$1' in "
 391                        . "paragraph $paragraph.  ignoring.\n";
 392                 }
 393             }
 394             $top = 0;
 395         }
 396         else {
 397             next if $ignore;
 398             next if @begin_stack && $begin_stack[-1] ne 'html';
 399             my $text = $_;
 400             process_text(\$text, 1);
 401             print HTML "<P>\n$text";
 402         }
 403     }
 404
 405     # finish off any pending directives
 406     finish_list();
 407     print HTML <<END_OF_TAIL;
 408 </BODY>
 409
 410 </HTML>
 411 END_OF_TAIL
 412
 413     # close the html file
 414     close(HTML);
 415
 416     warn "Finished\n" if $verbose;
 417 }
 418
 419 ##############################################################################
 420
 421 my $usage;                      # see below
 422 sub usage {
 423     my $podfile = shift;
 424     warn "$0: $podfile: @_\n" if @_;
 425     die $usage;
 426 }
 427
 428 $usage =<<END_OF_USAGE;
 429 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 430            --podpath=<name>:...:<name> --podroot=<name>
 431            --libpods=<name>:...:<name> --recurse --verbose --index
 432            --netscape --norecurse --noindex
 433
 434   --flush      - flushes the item and directory caches.
 435   --help       - prints this message.
 436   --htmlroot   - http-server base directory from which all relative paths
 437                  in podpath stem (default is /).
 438   --index      - generate an index at the top of the resulting html
 439                  (default).
 440   --infile     - filename for the pod to convert (input taken from stdin
 441                  by default).
 442   --libpods    - colon-separated list of pages to search for =item pod
 443                  directives in as targets of C<> and implicit links (empty
 444                  by default).  note, these are not filenames, but rather
 445                  page names like those that appear in L<> links.
 446   --netscape   - will use netscape html directives when applicable.
 447   --nonetscape - will not use netscape directives (default).
 448   --outfile    - filename for the resulting html file (output sent to
 449                  stdout by default).
 450   --podpath    - colon-separated list of directories containing library
 451                  pods.  empty by default.
 452   --podroot    - filesystem base directory from which all relative paths
 453                  in podpath stem (default is .).
 454   --noindex    - don't generate an index at the top of the resulting html.
 455   --norecurse  - don't recurse on those subdirectories listed in podpath.
 456   --recurse    - recurse on those subdirectories listed in podpath
 457                  (default behavior).
 458   --title      - title that will appear in resulting html file.
 459   --verbose    - self-explanatory
 460
 461 END_OF_USAGE
 462
 463 sub parse_command_line {
 464     my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
 465     my $result = GetOptions(
 466                             'flush'      => \$opt_flush,
 467                             'help'       => \$opt_help,
 468                             'htmlroot=s' => \$opt_htmlroot,
 469                             'index!'     => \$opt_index,
 470                             'infile=s'   => \$opt_infile,
 471                             'libpods=s'  => \$opt_libpods,
 472                             'netscape!'  => \$opt_netscape,
 473                             'outfile=s'  => \$opt_outfile,
 474                             'podpath=s'  => \$opt_podpath,
 475                             'podroot=s'  => \$opt_podroot,
 476                             'norecurse'  => \$opt_norecurse,
 477                             'recurse!'   => \$opt_recurse,
 478                             'title=s'    => \$opt_title,
 479                             'verbose'    => \$opt_verbose,
 480                            );
 481     usage("-", "invalid parameters") if not $result;
 482
 483     usage("-") if defined $opt_help;    # see if the user asked for help
 484     $opt_help = "";                     # just to make -w shut-up.
 485
 486     $podfile  = $opt_infile if defined $opt_infile;
 487     $htmlfile = $opt_outfile if defined $opt_outfile;
 488
 489     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 490     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 491
 492     warn "Flushing item and directory caches\n"
 493         if $opt_verbose && defined $opt_flush;
 494     unlink($dircache, $itemcache) if defined $opt_flush;
 495
 496     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 497     $podroot  = $opt_podroot if defined $opt_podroot;
 498
 499     $doindex  = $opt_index if defined $opt_index;
 500     $recurse  = $opt_recurse if defined $opt_recurse;
 501     $title    = $opt_title if defined $opt_title;
 502     $verbose  = defined $opt_verbose ? 1 : 0;
 503     $netscape = $opt_netscape if defined $opt_netscape;
 504 }
 505
 506
 507 my $saved_cache_key;
 508
 509 sub get_cache {
 510     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 511     my @cache_key_args = @_;
 512
 513     # A first-level cache:
 514     # Don't bother reading the cache files if they still apply
 515     # and haven't changed since we last read them.
 516
 517     my $this_cache_key = cache_key(@cache_key_args);
 518
 519     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 520
 521     # load the cache of %pages and %items if possible.  $tests will be
 522     # non-zero if successful.
 523     my $tests = 0;
 524     if (-f $dircache && -f $itemcache) {
 525         warn "scanning for item cache\n" if $verbose;
 526         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 527     }
 528
 529     # if we didn't succeed in loading the cache then we must (re)build
 530     #  %pages and %items.
 531     if (!$tests) {
 532         warn "scanning directories in pod-path\n" if $verbose;
 533         scan_podpath($podroot, $recurse, 0);
 534     }
 535     $saved_cache_key = cache_key(@cache_key_args);
 536 }
 537
 538 sub cache_key {
 539     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 540     return join('!', $dircache, $itemcache, $recurse,
 541                 @$podpath, $podroot, stat($dircache), stat($itemcache));
 542 }
 543
 544 #
 545 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 546 #  are valid caches of %pages and %items.  if they are valid then it loads
 547 #  them and returns a non-zero value.
 548 #
 549
 550 sub load_cache {
 551     my($dircache, $itemcache, $podpath, $podroot) = @_;
 552     my($tests);
 553     local $_;
 554
 555     $tests = 0;
 556
 557     open(CACHE, "<$itemcache") ||
 558         die "$0: error opening $itemcache for reading: $!\n";
 559     $/ = "\n";
 560
 561     # is it the same podpath?
 562     $_ = <CACHE>;
 563     chomp($_);
 564     $tests++ if (join(":", @$podpath) eq $_);
 565
 566     # is it the same podroot?
 567     $_ = <CACHE>;
 568     chomp($_);
 569     $tests++ if ($podroot eq $_);
 570
 571     # load the cache if its good
 572     if ($tests != 2) {
 573         close(CACHE);
 574         return 0;
 575     }
 576
 577     warn "loading item cache\n" if $verbose;
 578     while (<CACHE>) {
 579         /(.*?) (.*)$/;
 580         $items{$1} = $2;
 581     }
 582     close(CACHE);
 583
 584     warn "scanning for directory cache\n" if $verbose;
 585     open(CACHE, "<$dircache") ||
 586         die "$0: error opening $dircache for reading: $!\n";
 587     $/ = "\n";
 588     $tests = 0;
 589
 590     # is it the same podpath?
 591     $_ = <CACHE>;
 592     chomp($_);
 593     $tests++ if (join(":", @$podpath) eq $_);
 594
 595     # is it the same podroot?
 596     $_ = <CACHE>;
 597     chomp($_);
 598     $tests++ if ($podroot eq $_);
 599
 600     # load the cache if its good
 601     if ($tests != 2) {
 602         close(CACHE);
 603         return 0;
 604     }
 605
 606     warn "loading directory cache\n" if $verbose;
 607     while (<CACHE>) {
 608         /(.*?) (.*)$/;
 609         $pages{$1} = $2;
 610     }
 611
 612     close(CACHE);
 613
 614     return 1;
 615 }
 616
 617 #
 618 # scan_podpath - scans the directories specified in @podpath for directories,
 619 #  .pod files, and .pm files.  it also scans the pod files specified in
 620 #  @libpods for =item directives.
 621 #
 622 sub scan_podpath {
 623     my($podroot, $recurse, $append) = @_;
 624     my($pwd, $dir);
 625     my($libpod, $dirname, $pod, @files, @poddata);
 626
 627     unless($append) {
 628         %items = ();
 629         %pages = ();
 630     }
 631
 632     # scan each directory listed in @podpath
 633     $pwd = getcwd();
 634     chdir($podroot)
 635         || die "$0: error changing to directory $podroot: $!\n";
 636     foreach $dir (@podpath) {
 637         scan_dir($dir, $recurse);
 638     }
 639
 640     # scan the pods listed in @libpods for =item directives
 641     foreach $libpod (@libpods) {
 642         # if the page isn't defined then we won't know where to find it
 643         # on the system.
 644         next unless defined $pages{$libpod} && $pages{$libpod};
 645
 646         # if there is a directory then use the .pod and .pm files within it.
 647         if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 648             #  find all the .pod and .pm files within the directory
 649             $dirname = $1;
 650             opendir(DIR, $dirname) ||
 651                 die "$0: error opening directory $dirname: $!\n";
 652             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 653             closedir(DIR);
 654
 655             # scan each .pod and .pm file for =item directives
 656             foreach $pod (@files) {
 657                 open(POD, "<$dirname/$pod") ||
 658                     die "$0: error opening $dirname/$pod for input: $!\n";
 659                 @poddata = <POD>;
 660                 close(POD);
 661
 662                 scan_items("$dirname/$pod", @poddata);
 663             }
 664
 665             # use the names of files as =item directives too.
 666             foreach $pod (@files) {
 667                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 668                 $items{$1} = "$dirname/$1.html" if $1;
 669             }
 670         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 671                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 672             # scan the .pod or .pm file for =item directives
 673             $pod = $1;
 674             open(POD, "<$pod") ||
 675                 die "$0: error opening $pod for input: $!\n";
 676             @poddata = <POD>;
 677             close(POD);
 678
 679             scan_items("$pod", @poddata);
 680         } else {
 681             warn "$0: shouldn't be here (line ".__LINE__."\n";
 682         }
 683     }
 684     @poddata = ();      # clean-up a bit
 685
 686     chdir($pwd)
 687         || die "$0: error changing to directory $pwd: $!\n";
 688
 689     # cache the item list for later use
 690     warn "caching items for later use\n" if $verbose;
 691     open(CACHE, ">$itemcache") ||
 692         die "$0: error open $itemcache for writing: $!\n";
 693
 694     print CACHE join(":", @podpath) . "\n$podroot\n";
 695     foreach my $key (keys %items) {
 696         print CACHE "$key $items{$key}\n";
 697     }
 698
 699     close(CACHE);
 700
 701     # cache the directory list for later use
 702     warn "caching directories for later use\n" if $verbose;
 703     open(CACHE, ">$dircache") ||
 704         die "$0: error open $dircache for writing: $!\n";
 705
 706     print CACHE join(":", @podpath) . "\n$podroot\n";
 707     foreach my $key (keys %pages) {
 708         print CACHE "$key $pages{$key}\n";
 709     }
 710
 711     close(CACHE);
 712 }
 713
 714 #
 715 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 716 #  files, and .pm files.  notes those that it finds.  this information will
 717 #  be used later in order to figure out where the pages specified in L<>
 718 #  links are on the filesystem.
 719 #
 720 sub scan_dir {
 721     my($dir, $recurse) = @_;
 722     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 723     local $_;
 724
 725     @subdirs = ();
 726     @pods = ();
 727
 728     opendir(DIR, $dir) ||
 729         die "$0: error opening directory $dir: $!\n";
 730     while (defined($_ = readdir(DIR))) {
 731         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 732             $pages{$_}  = "" unless defined $pages{$_};
 733             $pages{$_} .= "$dir/$_:";
 734             push(@subdirs, $_);
 735         } elsif (/\.pod$/) {                                # .pod
 736             s/\.pod$//;
 737             $pages{$_}  = "" unless defined $pages{$_};
 738             $pages{$_} .= "$dir/$_.pod:";
 739             push(@pods, "$dir/$_.pod");
 740         } elsif (/\.pm$/) {                                 # .pm
 741             s/\.pm$//;
 742             $pages{$_}  = "" unless defined $pages{$_};
 743             $pages{$_} .= "$dir/$_.pm:";
 744             push(@pods, "$dir/$_.pm");
 745         }
 746     }
 747     closedir(DIR);
 748
 749     # recurse on the subdirectories if necessary
 750     if ($recurse) {
 751         foreach my $subdir (@subdirs) {
 752             scan_dir("$dir/$subdir", $recurse);
 753         }
 754     }
 755 }
 756
 757 #
 758 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 759 #  build an index.
 760 #
 761 sub scan_headings {
 762     my($sections, @data) = @_;
 763     my($tag, $which_head, $title, $listdepth, $index);
 764
 765     # here we need      local $ignore = 0;
 766     #  unfortunately, we can't have it, because $ignore is lexical
 767     $ignore = 0;
 768
 769     $listdepth = 0;
 770     $index = "";
 771
 772     # scan for =head directives, note their name, and build an index
 773     #  pointing to each of them.
 774     foreach my $line (@data) {
 775         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 776             ($tag,$which_head, $title) = ($1,$2,$3);
 777             chomp($title);
 778             $$sections{htmlify(0,$title)} = 1;
 779
 780             while ($which_head != $listdepth) {
 781                 if ($which_head > $listdepth) {
 782                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 783                     $listdepth++;
 784                 } elsif ($which_head < $listdepth) {
 785                     $listdepth--;
 786                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 787                 }
 788             }
 789
 790             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 791                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 792                       html_escape(process_text(\$title, 0)) . "</A>";
 793         }
 794     }
 795
 796     # finish off the lists
 797     while ($listdepth--) {
 798         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 799     }
 800
 801     # get rid of bogus lists
 802     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 803
 804     $ignore = 1;        # restore old value;
 805
 806     return $index;
 807 }
 808
 809 #
 810 # scan_items - scans the pod specified by $pod for =item directives.  we
 811 #  will use this information later on in resolving C<> links.
 812 #
 813 sub scan_items {
 814     my($pod, @poddata) = @_;
 815     my($i, $item);
 816     local $_;
 817
 818     $pod =~ s/\.pod$//;
 819     $pod .= ".html" if $pod;
 820
 821     foreach $i (0..$#poddata) {
 822         $_ = $poddata[$i];
 823
 824         # remove any formatting instructions
 825         s,[A-Z]<([^<>]*)>,$1,g;
 826
 827         # figure out what kind of item it is and get the first word of
 828         #  it's name.
 829         if (/^=item\s+(\w*)\s*.*$/s) {
 830             if ($1 eq "*") {            # bullet list
 831                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 832                 $item = $1;
 833             } elsif ($1 =~ /^\d+/) {    # numbered list
 834                 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
 835                 $item = $1;
 836             } else {
 837 #               /\A=item\s+(.*?)\s*\Z/s;
 838                 /\A=item\s+(\w*)/s;
 839                 $item = $1;
 840             }
 841
 842             $items{$item} = "$pod" if $item;
 843         }
 844     }
 845 }
 846
 847 #
 848 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 849 #
 850 sub process_head {
 851     my($tag, $heading) = @_;
 852     my $firstword;
 853
 854     # figure out the level of the =head
 855     $tag =~ /head([1-6])/;
 856     my $level = $1;
 857
 858     # can't have a heading full of spaces and speechmarks and so on
 859     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 860
 861     print HTML "<P>\n" unless $listlevel;
 862     print HTML "<HR>\n" unless $listlevel || $top;
 863     print HTML "<H$level>"; # unless $listlevel;
 864     #print HTML "<H$level>" unless $listlevel;
 865     my $convert = $heading; process_text(\$convert, 0);
 866     $convert = html_escape($convert);
 867     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 868     print HTML "</H$level>"; # unless $listlevel;
 869     print HTML "\n";
 870 }
 871
 872 #
 873 # process_item - convert a pod item tag and convert it to HTML format.
 874 #
 875 sub process_item {
 876     my $text = $_[0];
 877     my($i, $quote, $name);
 878
 879     my $need_preamble = 0;
 880     my $this_entry;
 881
 882
 883     # lots of documents start a list without doing an =over.  this is
 884     # bad!  but, the proper thing to do seems to be to just assume
 885     # they did do an =over.  so warn them once and then continue.
 886     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 887         unless $listlevel;
 888     process_over() unless $listlevel;
 889
 890     return unless $listlevel;
 891
 892     # remove formatting instructions from the text
 893     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 894     pre_escape(\$text);
 895
 896     $need_preamble = $items_seen[$listlevel]++ == 0;
 897
 898     # check if this is the first =item after an =over
 899     $i = $listlevel - 1;
 900     my $need_new = $listlevel >= @listitem;
 901
 902     if ($text =~ /\A\*/) {              # bullet
 903
 904         if ($need_preamble) {
 905             push(@listend,  "</UL>");
 906             print HTML "<UL>\n";
 907         }
 908
 909         print HTML '<LI>';
 910         if ($text =~ /\A\*\s*(.+)\Z/s) {
 911             print HTML '<STRONG>';
 912             if ($items_named{$1}++) {
 913                 print HTML html_escape($1);
 914             } else {
 915                 my $name = 'item_' . htmlify(1,$1);
 916                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 917             }
 918             print HTML '</STRONG>';
 919         }
 920
 921     } elsif ($text =~ /\A[\d#]+/) {     # numbered list
 922
 923         if ($need_preamble) {
 924             push(@listend,  "</OL>");
 925             print HTML "<OL>\n";
 926         }
 927
 928         print HTML '<LI>';
 929         if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
 930             print HTML '<STRONG>';
 931             if ($items_named{$1}++) {
 932                 print HTML html_escape($1);
 933             } else {
 934                 my $name = 'item_' . htmlify(0,$1);
 935                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 936             }
 937             print HTML '</STRONG>';
 938         }
 939
 940     } else {                    # all others
 941
 942         if ($need_preamble) {
 943             push(@listend,  '</DL>');
 944             print HTML "<DL>\n";
 945         }
 946
 947         print HTML '<DT>';
 948         if ($text =~ /(\S+)/) {
 949             print HTML '<STRONG>';
 950             if ($items_named{$1}++) {
 951                 print HTML html_escape($text);
 952             } else {
 953                 my $name = 'item_' . htmlify(1,$text);
 954                 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
 955             }
 956             print HTML '</STRONG>';
 957         }
 958        print HTML '<DD>';
 959     }
 960
 961     print HTML "\n";
 962 }
 963
 964 #
 965 # process_over - process a pod over tag and start a corresponding HTML
 966 # list.
 967 #
 968 sub process_over {
 969     # start a new list
 970     $listlevel++;
 971 }
 972
 973 #
 974 # process_back - process a pod back tag and convert it to HTML format.
 975 #
 976 sub process_back {
 977     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
 978         unless $listlevel;
 979     return unless $listlevel;
 980
 981     # close off the list.  note, I check to see if $listend[$listlevel] is
 982     # defined because an =item directive may have never appeared and thus
 983     # $listend[$listlevel] may have never been initialized.
 984     $listlevel--;
 985     print HTML $listend[$listlevel] if defined $listend[$listlevel];
 986     print HTML "\n";
 987
 988     # don't need the corresponding perl code anymore
 989     pop(@listitem);
 990     pop(@listdata);
 991     pop(@listend);
 992
 993     pop(@items_seen);
 994 }
 995
 996 #
 997 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
 998 #
 999 sub process_cut {
1000     $ignore = 1;
1001 }
1002
1003 #
1004 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1005 # corresponding cut.
1006 #
1007 sub process_pod {
1008     # no need to set $ignore to 0 cause the main loop did it
1009 }
1010
1011 #
1012 # process_for - process a =for pod tag.  if it's for html, split
1013 # it out verbatim, otherwise ignore it.
1014 #
1015 sub process_for {
1016     my($whom, $text) = @_;
1017     if ( $whom =~ /^(pod2)?html$/i) {
1018         print HTML $text;
1019     }
1020 }
1021
1022 #
1023 # process_begin - process a =begin pod tag.  this pushes
1024 # whom we're beginning on the begin stack.  if there's a
1025 # begin stack, we only print if it us.
1026 #
1027 sub process_begin {
1028     my($whom, $text) = @_;
1029     $whom = lc($whom);
1030     push (@begin_stack, $whom);
1031     if ( $whom =~ /^(pod2)?html$/) {
1032         print HTML $text if $text;
1033     }
1034 }
1035
1036 #
1037 # process_end - process a =end pod tag.  pop the
1038 # begin stack.  die if we're mismatched.
1039 #
1040 sub process_end {
1041     my($whom, $text) = @_;
1042     $whom = lc($whom);
1043     if ($begin_stack[-1] ne $whom ) {
1044         die "Unmatched begin/end at chunk $paragraph\n"
1045     }
1046     pop @begin_stack;
1047 }
1048
1049 #
1050 # process_text - handles plaintext that appears in the input pod file.
1051 # there may be pod commands embedded within the text so those must be
1052 # converted to html commands.
1053 #
1054 sub process_text {
1055     my($text, $escapeQuotes) = @_;
1056     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1057     my($podcommand, $params, $tag, $quote);
1058
1059     return if $ignore;
1060
1061     $quote  = 0;                # status of double-quote conversion
1062     $result = "";
1063     $rest = $$text;
1064
1065     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1066         $rest =~ s/\n+\Z//;
1067         $rest =~ s#.*#
1068             my $line = $&;
1069             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1070             $line;
1071         #eg;
1072
1073         $rest   =~ s/&/&amp;/g;
1074         $rest   =~ s/</&lt;/g;
1075         $rest   =~ s/>/&gt;/g;
1076         $rest   =~ s/"/&quot;/g;
1077
1078         # try and create links for all occurrences of perl.* within
1079         # the preformatted text.
1080         $rest =~ s{
1081                     (\s*)(perl\w+)
1082                   }{
1083                     if (defined $pages{$2}) {   # is a link
1084                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1085                     } elsif (defined $pages{dosify($2)}) {      # is a link
1086                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1087                     } else {
1088                         "$1$2";
1089                     }
1090                   }xeg;
1091         $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1092
1093   my $urls = '(' . join ('|', qw{
1094                 http
1095                 telnet
1096                 mailto
1097                 news
1098                 gopher
1099                 file
1100                 wais
1101                 ftp
1102             } )
1103         . ')';
1104
1105   my $ltrs = '\w';
1106   my $gunk = '/#~:.?+=&%@!\-';
1107   my $punc = '.:?\-';
1108   my $any  = "${ltrs}${gunk}${punc}";
1109
1110   $rest =~ s{
1111         \b                          # start at word boundary
1112         (                           # begin $1  {
1113           $urls     :               # need resource and a colon
1114           [$any] +?                 # followed by on or more
1115                                     #  of any valid character, but
1116                                     #  be conservative and take only
1117                                     #  what you need to....
1118         )                           # end   $1  }
1119         (?=                         # look-ahead non-consumptive assertion
1120                 [$punc]*            # either 0 or more puntuation
1121                 [^$any]             #   followed by a non-url char
1122             |                       # or else
1123                 $                   #   then end of the string
1124         )
1125       }{<A HREF="$1">$1</A>}igox;
1126
1127         $result =   "<PRE>"     # text should be as it is (verbatim)
1128                   . "$rest\n"
1129                   . "</PRE>\n";
1130     } else {                    # formatted text
1131         # parse through the string, stopping each time we find a
1132         # pod-escape.  once the string has been throughly processed
1133         # we can output it.
1134         while (length $rest) {
1135             # check to see if there are any possible pod directives in
1136             # the remaining part of the text.
1137             if ($rest =~ m/[BCEIFLSZ]</) {
1138                 warn "\$rest\t= $rest\n" unless
1139                     $rest =~ /\A
1140                            ([^<]*?)
1141                            ([BCEIFLSZ]?)
1142                            <
1143                            (.*)\Z/xs;
1144
1145                 $s1 = $1;       # pure text
1146                 $s2 = $2;       # the type of pod-escape that follows
1147                 $s3 = '<';      # '<'
1148                 $s4 = $3;       # the rest of the string
1149             } else {
1150                 $s1 = $rest;
1151                 $s2 = "";
1152                 $s3 = "";
1153                 $s4 = "";
1154             }
1155
1156             if ($s3 eq '<' && $s2) {    # a pod-escape
1157                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1158                 $podcommand = "$s2<";
1159                 $rest       = $s4;
1160
1161                 # find the matching '>'
1162                 $match = 1;
1163                 $bf = 0;
1164                 while ($match && !$bf) {
1165                     $bf = 1;
1166                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1167                         $bf = 0;
1168                         $match++;
1169                         $podcommand .= $1;
1170                         $rest        = $2;
1171                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1172                         $bf = 0;
1173                         $match--;
1174                         $podcommand .= $1;
1175                         $rest        = $2;
1176                     }
1177                 }
1178
1179                 if ($match != 0) {
1180                     warn <<WARN;
1181 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1182 WARN
1183                     $result .= substr $podcommand, 0, 2;
1184                     $rest = substr($podcommand, 2) . $rest;
1185                     next;
1186                 }
1187
1188                 # pull out the parameters to the pod-escape
1189                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1190                 $tag    = $1;
1191                 $params = $2;
1192
1193                 # process the text within the pod-escape so that any escapes
1194                 # which must occur do.
1195                 process_text(\$params, 0) unless $tag eq 'L';
1196
1197                 $s1 = $params;
1198                 if (!$tag || $tag eq " ") {     #  <> : no tag
1199                     $s1 = "&lt;$params&gt;";
1200                 } elsif ($tag eq "L") {         # L<> : link
1201                     $s1 = process_L($params);
1202                 } elsif ($tag eq "I" ||         # I<> : italicize text
1203                          $tag eq "B" ||         # B<> : bold text
1204                          $tag eq "F") {         # F<> : file specification
1205                     $s1 = process_BFI($tag, $params);
1206                 } elsif ($tag eq "C") {         # C<> : literal code
1207                     $s1 = process_C($params, 1);
1208                 } elsif ($tag eq "E") {         # E<> : escape
1209                     $s1 = process_E($params);
1210                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1211                     $s1 = process_Z($params);
1212                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1213                     $s1 = process_S($params);
1214                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1215                     $s1 = process_X($params);
1216                 } else {
1217                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1218                 }
1219
1220                 $result .= "$s1";
1221             } else {
1222                 # for pure text we must deal with implicit links and
1223                 # double-quotes among other things.
1224                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1225                 $rest    = $s4;
1226             }
1227         }
1228     }
1229     $$text = $result;
1230 }
1231
1232 sub html_escape {
1233     my $rest = $_[0];
1234     $rest   =~ s/&/&amp;/g;
1235     $rest   =~ s/</&lt;/g;
1236     $rest   =~ s/>/&gt;/g;
1237     $rest   =~ s/"/&quot;/g;
1238     return $rest;
1239 }
1240
1241 #
1242 # process_puretext - process pure text (without pod-escapes) converting
1243 #  double-quotes and handling implicit C<> links.
1244 #
1245 sub process_puretext {
1246     my($text, $quote) = @_;
1247     my(@words, $result, $rest, $lead, $trail);
1248
1249     # convert double-quotes to single-quotes
1250     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1251     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1252
1253     $$quote = ($text =~ m/"/ ? 1 : 0);
1254     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1255
1256     # keep track of leading and trailing white-space
1257     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1258     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1259
1260     # collapse all white space into a single space
1261     $text =~ s/\s+/ /g;
1262     @words = split(" ", $text);
1263
1264     # process each word individually
1265     foreach my $word (@words) {
1266         # see if we can infer a link
1267         if ($word =~ /^\w+\(/) {
1268             # has parenthesis so should have been a C<> ref
1269             $word = process_C($word);
1270 #           $word =~ /^[^()]*]\(/;
1271 #           if (defined $items{$1} && $items{$1}) {
1272 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1273 #                       . htmlify(0,$word)
1274 #                       . "\">$word</A></CODE>";
1275 #           } elsif (defined $items{$word} && $items{$word}) {
1276 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1277 #                       . htmlify(0,$word)
1278 #                       . "\">$word</A></CODE>";
1279 #           } else {
1280 #               $word =   "\n<CODE><A HREF=\"#item_"
1281 #                       . htmlify(0,$word)
1282 #                       . "\">$word</A></CODE>";
1283 #           }
1284         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1285             # perl variables, should be a C<> ref
1286             $word = process_C($word, 1);
1287         } elsif ($word =~ m,^\w+://\w,) {
1288             # looks like a URL
1289             $word = qq(<A HREF="$word">$word</A>);
1290         } elsif ($word =~ /[\w.-]+\@\w+\.\w/) {
1291             # looks like an e-mail address
1292             my ($w1, $w2, $w3) = ("", $word, "");
1293             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1294             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1295             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1296         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1297             $word = html_escape($word) if $word =~ /["&<>]/;
1298             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1299         } else {
1300             $word = html_escape($word) if $word =~ /["&<>]/;
1301         }
1302     }
1303
1304     # build a new string based upon our conversion
1305     $result = "";
1306     $rest   = join(" ", @words);
1307     while (length($rest) > 75) {
1308         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1309              $rest =~ m/^(\S*)\s(.*?)$/o) {
1310
1311             $result .= "$1\n";
1312             $rest    = $2;
1313         } else {
1314             $result .= "$rest\n";
1315             $rest    = "";
1316         }
1317     }
1318     $result .= $rest if $rest;
1319
1320     # restore the leading and trailing white-space
1321     $result = "$lead$result$trail";
1322
1323     return $result;
1324 }
1325
1326 #
1327 # pre_escape - convert & in text to $amp;
1328 #
1329 sub pre_escape {
1330     my($str) = @_;
1331
1332     $$str =~ s,&,&amp;,g;
1333 }
1334
1335 #
1336 # dosify - convert filenames to 8.3
1337 #
1338 sub dosify {
1339     my($str) = @_;
1340     if ($Is83) {
1341         $str = lc $str;
1342         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1343         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1344     }
1345     return $str;
1346 }
1347
1348 #
1349 # process_L - convert a pod L<> directive to a corresponding HTML link.
1350 #  most of the links made are inferred rather than known about directly
1351 #  (i.e it's not known whether the =head\d section exists in the target file,
1352 #   or whether a .pod file exists in the case of split files).  however, the
1353 #  guessing usually works.
1354 #
1355 # Unlike the other directives, this should be called with an unprocessed
1356 # string, else tags in the link won't be matched.
1357 #
1358 sub process_L {
1359     my($str) = @_;
1360     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1361
1362     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1363     $s1 = $str;
1364     for ($s1) {
1365         # LREF: a la HREF L<show this text|man/section>
1366         $linktext = $1 if s:^([^|]+)\|::;
1367
1368         # a :: acts like a /
1369         s,::,/,;
1370
1371         # make sure sections start with a /
1372         s,^",/",g;
1373         s,^,/,g if (!m,/, && / /);
1374
1375         # check if there's a section specified
1376         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1377             ($page, $section) = ($1, $2);
1378         } else {                        # no
1379             ($page, $section) = ($str, "");
1380         }
1381
1382         # check if we know that this is a section in this page
1383         if (!defined $pages{$page} && defined $sections{$page}) {
1384             $section = $page;
1385             $page = "";
1386         }
1387     }
1388
1389     $page83=dosify($page);
1390     $page=$page83 if (defined $pages{$page83});
1391     if ($page eq "") {
1392         $link = "#" . htmlify(0,$section);
1393         $linktext = $section unless defined($linktext);
1394     } elsif (!defined $pages{$page}) {
1395         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1396         $link = "";
1397         $linktext = $page unless defined($linktext);
1398     } else {
1399         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1400         $section = htmlify(0,$section) if $section ne "";
1401
1402         # if there is a directory by the name of the page, then assume that an
1403         # appropriate section will exist in the subdirectory
1404         if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1405             $link = "$htmlroot/$1/$section.html";
1406
1407         # since there is no directory by the name of the page, the section will
1408         # have to exist within a .html of the same name.  thus, make sure there
1409         # is a .pod or .pm that might become that .html
1410         } else {
1411             $section = "#$section";
1412             # check if there is a .pod with the page name
1413             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1414                 $link = "$htmlroot/$1.html$section";
1415             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1416                 $link = "$htmlroot/$1.html$section";
1417             } else {
1418                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1419                              "no .pod or .pm found\n";
1420                 $link = "";
1421                 $linktext = $section unless defined($linktext);
1422             }
1423         }
1424     }
1425
1426     process_text(\$linktext, 0);
1427     if ($link) {
1428         $s1 = "<A HREF=\"$link\">$linktext</A>";
1429     } else {
1430         $s1 = "<EM>$linktext</EM>";
1431     }
1432     return $s1;
1433 }
1434
1435 #
1436 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1437 # convert them to corresponding HTML directives.
1438 #
1439 sub process_BFI {
1440     my($tag, $str) = @_;
1441     my($s1);                    # work string
1442     my(%repltext) = (   'B' => 'STRONG',
1443                         'F' => 'EM',
1444                         'I' => 'EM');
1445
1446     # extract the modified text and convert to HTML
1447     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1448     return $s1;
1449 }
1450
1451 #
1452 # process_C - process the C<> pod-escape.
1453 #
1454 sub process_C {
1455     my($str, $doref) = @_;
1456     my($s1, $s2);
1457
1458     $s1 = $str;
1459     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1460     $s2 = $s1;
1461     $s1 =~ s/\W//g;             # delete bogus characters
1462     $str = html_escape($str);
1463
1464     # if there was a pod file that we found earlier with an appropriate
1465     # =item directive, then create a link to that page.
1466     if ($doref && defined $items{$s1}) {
1467         $s1 = ($items{$s1} ?
1468                "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) .  "\">$str</A>" :
1469                "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>");
1470         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1471         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1472     } else {
1473         $s1 = "<CODE>$str</CODE>";
1474         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1475     }
1476
1477
1478     return $s1;
1479 }
1480
1481 #
1482 # process_E - process the E<> pod directive which seems to escape a character.
1483 #
1484 sub process_E {
1485     my($str) = @_;
1486
1487     for ($str) {
1488         s,([^/].*),\&$1\;,g;
1489     }
1490
1491     return $str;
1492 }
1493
1494 #
1495 # process_Z - process the Z<> pod directive which really just amounts to
1496 # ignoring it.  this allows someone to start a paragraph with an =
1497 #
1498 sub process_Z {
1499     my($str) = @_;
1500
1501     # there is no equivalent in HTML for this so just ignore it.
1502     $str = "";
1503     return $str;
1504 }
1505
1506 #
1507 # process_S - process the S<> pod directive which means to convert all
1508 # spaces in the string to non-breaking spaces (in HTML-eze).
1509 #
1510 sub process_S {
1511     my($str) = @_;
1512
1513     # convert all spaces in the text to non-breaking spaces in HTML.
1514     $str =~ s/ /&nbsp;/g;
1515     return $str;
1516 }
1517
1518 #
1519 # process_X - this is supposed to make an index entry.  we'll just
1520 # ignore it.
1521 #
1522 sub process_X {
1523     return '';
1524 }
1525
1526
1527 #
1528 # finish_list - finish off any pending HTML lists.  this should be called
1529 # after the entire pod file has been read and converted.
1530 #
1531 sub finish_list {
1532     while ($listlevel > 0) {
1533         print HTML "</DL>\n";
1534         $listlevel--;
1535     }
1536 }
1537
1538 #
1539 # htmlify - converts a pod section specification to a suitable section
1540 # specification for HTML.  if first arg is 1, only takes 1st word.
1541 #
1542 sub htmlify {
1543     my($compact, $heading) = @_;
1544
1545     if ($compact) {
1546       $heading =~ /^(\w+)/;
1547       $heading = $1;
1548     }
1549
1550   # $heading = lc($heading);
1551   $heading =~ s/[^\w\s]/_/g;
1552   $heading =~ s/(\s+)/ /g;
1553   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1554   $heading =~ s/ /_/g;
1555   $heading =~ s/\A(.{32}).*\Z/$1/s;
1556   $heading =~ s/\s+\Z//;
1557   $heading =~ s/_{2,}/_/g;
1558
1559   return $heading;
1560 }
1561
1562 BEGIN {
1563 }
1564
1565 1;