lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 require Exporter;
   6 use vars qw($VERSION);
   7 $VERSION = 1.01;
   8 @ISA = Exporter;
   9 @EXPORT = qw(pod2html htmlify);
  10 use Cwd;
  11
  12 use Carp;
  13
  14 use locale;     # make \w work right in non-ASCII lands
  15
  16 use strict;
  17
  18 use Config;
  19
  20 =head1 NAME
  21
  22 Pod::Html - module to convert pod files to HTML
  23
  24 =head1 SYNOPSIS
  25
  26     use Pod::Html;
  27     pod2html([options]);
  28
  29 =head1 DESCRIPTION
  30
  31 Converts files from pod format (see L<perlpod>) to HTML format.  It
  32 can automatically generate indexes and cross-references, and it keeps
  33 a cache of things it knows how to cross-reference.
  34
  35 =head1 ARGUMENTS
  36
  37 Pod::Html takes the following arguments:
  38
  39 =over 4
  40
  41 =item help
  42
  43     --help
  44
  45 Displays the usage message.
  46
  47 =item htmlroot
  48
  49     --htmlroot=name
  50
  51 Sets the base URL for the HTML files.  When cross-references are made,
  52 the HTML root is prepended to the URL.
  53
  54 =item infile
  55
  56     --infile=name
  57
  58 Specify the pod file to convert.  Input is taken from STDIN if no
  59 infile is specified.
  60
  61 =item outfile
  62
  63     --outfile=name
  64
  65 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  66 is specified.
  67
  68 =item podroot
  69
  70     --podroot=name
  71
  72 Specify the base directory for finding library pods.
  73
  74 =item podpath
  75
  76     --podpath=name:...:name
  77
  78 Specify which subdirectories of the podroot contain pod files whose
  79 HTML converted forms can be linked-to in cross-references.
  80
  81 =item libpods
  82
  83     --libpods=name:...:name
  84
  85 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  86
  87 =item netscape
  88
  89     --netscape
  90
  91 Use Netscape HTML directives when applicable.
  92
  93 =item nonetscape
  94
  95     --nonetscape
  96
  97 Do not use Netscape HTML directives (default).
  98
  99 =item index
 100
 101     --index
 102
 103 Generate an index at the top of the HTML file (default behaviour).
 104
 105 =item noindex
 106
 107     --noindex
 108
 109 Do not generate an index at the top of the HTML file.
 110
 111
 112 =item recurse
 113
 114     --recurse
 115
 116 Recurse into subdirectories specified in podpath (default behaviour).
 117
 118 =item norecurse
 119
 120     --norecurse
 121
 122 Do not recurse into subdirectories specified in podpath.
 123
 124 =item title
 125
 126     --title=title
 127
 128 Specify the title of the resulting HTML file.
 129
 130 =item verbose
 131
 132     --verbose
 133
 134 Display progress messages.
 135
 136 =back
 137
 138 =head1 EXAMPLE
 139
 140     pod2html("pod2html",
 141              "--podpath=lib:ext:pod:vms",
 142              "--podroot=/usr/src/perl",
 143              "--htmlroot=/perl/nmanual",
 144              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 145              "--recurse",
 146              "--infile=foo.pod",
 147              "--outfile=/perl/nmanual/foo.html");
 148
 149 =head1 AUTHOR
 150
 151 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 152
 153 =head1 BUGS
 154
 155 Has trouble with C<> etc in = commands.
 156
 157 =head1 SEE ALSO
 158
 159 L<perlpod>
 160
 161 =head1 COPYRIGHT
 162
 163 This program is distributed under the Artistic License.
 164
 165 =cut
 166
 167 my $dircache = "pod2html-dircache";
 168 my $itemcache = "pod2html-itemcache";
 169
 170 my @begin_stack = ();           # begin/end stack
 171
 172 my @libpods = ();               # files to search for links from C<> directives
 173 my $htmlroot = "/";             # http-server base directory from which all
 174                                 #   relative paths in $podpath stem.
 175 my $htmlfile = "";              # write to stdout by default
 176 my $podfile = "";               # read from stdin by default
 177 my @podpath = ();               # list of directories containing library pods.
 178 my $podroot = ".";              # filesystem base directory from which all
 179                                 #   relative paths in $podpath stem.
 180 my $recurse = 1;                # recurse on subdirectories in $podpath.
 181 my $verbose = 0;                # not verbose by default
 182 my $doindex = 1;                # non-zero if we should generate an index
 183 my $listlevel = 0;              # current list depth
 184 my @listitem = ();              # stack of HTML commands to use when a =item is
 185                                 #   encountered.  the top of the stack is the
 186                                 #   current list.
 187 my @listdata = ();              # similar to @listitem, but for the text after
 188                                 #   an =item
 189 my @listend = ();               # similar to @listitem, but the text to use to
 190                                 #   end the list.
 191 my $ignore = 1;                 # whether or not to format text.  we don't
 192                                 #   format text until we hit our first pod
 193                                 #   directive.
 194
 195 my %items_named = ();           # for the multiples of the same item in perlfunc
 196 my @items_seen = ();
 197 my $netscape = 0;               # whether or not to use netscape directives.
 198 my $title;                      # title to give the pod(s)
 199 my $top = 1;                    # true if we are at the top of the doc.  used
 200                                 #   to prevent the first <HR> directive.
 201 my $paragraph;                  # which paragraph we're processing (used
 202                                 #   for error messages)
 203 my %pages = ();                 # associative array used to find the location
 204                                 #   of pages referenced by L<> links.
 205 my %sections = ();              # sections within this page
 206 my %items = ();                 # associative array used to find the location
 207                                 #   of =item directives referenced by C<> links
 208 my $Is83;                       # is dos with short filenames (8.3)
 209
 210 sub init_globals {
 211 $dircache = "pod2html-dircache";
 212 $itemcache = "pod2html-itemcache";
 213
 214 @begin_stack = ();              # begin/end stack
 215
 216 @libpods = ();          # files to search for links from C<> directives
 217 $htmlroot = "/";                # http-server base directory from which all
 218                                 #   relative paths in $podpath stem.
 219 $htmlfile = "";         # write to stdout by default
 220 $podfile = "";          # read from stdin by default
 221 @podpath = ();          # list of directories containing library pods.
 222 $podroot = ".";         # filesystem base directory from which all
 223                                 #   relative paths in $podpath stem.
 224 $recurse = 1;           # recurse on subdirectories in $podpath.
 225 $verbose = 0;           # not verbose by default
 226 $doindex = 1;                   # non-zero if we should generate an index
 227 $listlevel = 0;         # current list depth
 228 @listitem = ();         # stack of HTML commands to use when a =item is
 229                                 #   encountered.  the top of the stack is the
 230                                 #   current list.
 231 @listdata = ();         # similar to @listitem, but for the text after
 232                                 #   an =item
 233 @listend = ();          # similar to @listitem, but the text to use to
 234                                 #   end the list.
 235 $ignore = 1;                    # whether or not to format text.  we don't
 236                                 #   format text until we hit our first pod
 237                                 #   directive.
 238
 239 @items_seen = ();
 240 %items_named = ();
 241 $netscape = 0;          # whether or not to use netscape directives.
 242 $title = '';                    # title to give the pod(s)
 243 $top = 1;                       # true if we are at the top of the doc.  used
 244                                 #   to prevent the first <HR> directive.
 245 $paragraph = '';                        # which paragraph we're processing (used
 246                                 #   for error messages)
 247 %sections = ();         # sections within this page
 248
 249 # These are not reinitialised here but are kept as a cache.
 250 # See get_cache and related cache management code.
 251 #%pages = ();                   # associative array used to find the location
 252                                 #   of pages referenced by L<> links.
 253 #%items = ();                   # associative array used to find the location
 254                                 #   of =item directives referenced by C<> links
 255 $Is83=$^O eq 'dos';
 256 }
 257
 258 sub pod2html {
 259     local(@ARGV) = @_;
 260     local($/);
 261     local $_;
 262
 263     init_globals();
 264
 265     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 266
 267     # cache of %pages and %items from last time we ran pod2html
 268
 269     #undef $opt_help if defined $opt_help;
 270
 271     # parse the command-line parameters
 272     parse_command_line();
 273
 274     # set some variables to their default values if necessary
 275     local *POD;
 276     unless (@ARGV && $ARGV[0]) {
 277         $podfile  = "-" unless $podfile;        # stdin
 278         open(POD, "<$podfile")
 279                 || die "$0: cannot open $podfile file for input: $!\n";
 280     } else {
 281         $podfile = $ARGV[0];  # XXX: might be more filenames
 282         *POD = *ARGV;
 283     }
 284     $htmlfile = "-" unless $htmlfile;   # stdout
 285     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 286
 287     # read the pod a paragraph at a time
 288     warn "Scanning for sections in input file(s)\n" if $verbose;
 289     $/ = "";
 290     my @poddata  = <POD>;
 291     close(POD);
 292
 293     # scan the pod for =head[1-6] directives and build an index
 294     my $index = scan_headings(\%sections, @poddata);
 295
 296     unless($index) {
 297         warn "No pod in $podfile\n" if $verbose;
 298         return;
 299     }
 300
 301     # open the output file
 302     open(HTML, ">$htmlfile")
 303             || die "$0: cannot open $htmlfile file for output: $!\n";
 304
 305     # put a title in the HTML file if one wasn't specified
 306     if ($title eq '') {
 307         TITLE_SEARCH: {
 308             for (my $i = 0; $i < @poddata; $i++) {
 309                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 310                     for my $para ( @poddata[$i, $i+1] ) {
 311                         last TITLE_SEARCH
 312                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 313                     }
 314                 }
 315
 316             }
 317         }
 318     }
 319     if (!$title and $podfile =~ /\.pod$/) {
 320         # probably a split pod so take first =head[12] as title
 321         for (my $i = 0; $i < @poddata; $i++) {
 322             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 323         }
 324         warn "adopted '$title' as title for $podfile\n"
 325             if $verbose and $title;
 326     }
 327     if ($title) {
 328         $title =~ s/\s*\(.*\)//;
 329     } else {
 330         warn "$0: no title for $podfile";
 331         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 332         $title = ($podfile eq "-" ? 'No Title' : $1);
 333         warn "using $title" if $verbose;
 334     }
 335     print HTML <<END_OF_HEAD;
 336 <HTML>
 337 <HEAD>
 338 <TITLE>$title</TITLE>
 339 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 340 </HEAD>
 341
 342 <BODY>
 343
 344 END_OF_HEAD
 345
 346     # load/reload/validate/cache %pages and %items
 347     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 348
 349     # scan the pod for =item directives
 350     scan_items("", \%items, @poddata);
 351
 352     # put an index at the top of the file.  note, if $doindex is 0 we
 353     # still generate an index, but surround it with an html comment.
 354     # that way some other program can extract it if desired.
 355     $index =~ s/--+/-/g;
 356     print HTML "<!-- INDEX BEGIN -->\n";
 357     print HTML "<!--\n" unless $doindex;
 358     print HTML $index;
 359     print HTML "-->\n" unless $doindex;
 360     print HTML "<!-- INDEX END -->\n\n";
 361     print HTML "<HR>\n" if $doindex;
 362
 363     # now convert this file
 364     warn "Converting input file\n" if $verbose;
 365     foreach my $i (0..$#poddata) {
 366         $_ = $poddata[$i];
 367         $paragraph = $i+1;
 368         if (/^(=.*)/s) {        # is it a pod directive?
 369             $ignore = 0;
 370             $_ = $1;
 371             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 372                 process_begin($1, $2);
 373             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 374                 process_end($1, $2);
 375             } elsif (/^=cut/) {                 # =cut
 376                 process_cut();
 377             } elsif (/^=pod/) {                 # =pod
 378                 process_pod();
 379             } else {
 380                 next if @begin_stack && $begin_stack[-1] ne 'html';
 381
 382                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 383                     process_head($1, $2);
 384                 } elsif (/^=item\s*(.*\S)/sm) { # =item text
 385                     process_item($1);
 386                 } elsif (/^=over\s*(.*)/) {             # =over N
 387                     process_over();
 388                 } elsif (/^=back/) {            # =back
 389                     process_back();
 390                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 391                     process_for($1,$2);
 392                 } else {
 393                     /^=(\S*)\s*/;
 394                     warn "$0: $podfile: unknown pod directive '$1' in "
 395                        . "paragraph $paragraph.  ignoring.\n";
 396                 }
 397             }
 398             $top = 0;
 399         }
 400         else {
 401             next if $ignore;
 402             next if @begin_stack && $begin_stack[-1] ne 'html';
 403             my $text = $_;
 404             process_text(\$text, 1);
 405             print HTML "<P>\n$text";
 406         }
 407     }
 408
 409     # finish off any pending directives
 410     finish_list();
 411     print HTML <<END_OF_TAIL;
 412 </BODY>
 413
 414 </HTML>
 415 END_OF_TAIL
 416
 417     # close the html file
 418     close(HTML);
 419
 420     warn "Finished\n" if $verbose;
 421 }
 422
 423 ##############################################################################
 424
 425 my $usage;                      # see below
 426 sub usage {
 427     my $podfile = shift;
 428     warn "$0: $podfile: @_\n" if @_;
 429     die $usage;
 430 }
 431
 432 $usage =<<END_OF_USAGE;
 433 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 434            --podpath=<name>:...:<name> --podroot=<name>
 435            --libpods=<name>:...:<name> --recurse --verbose --index
 436            --netscape --norecurse --noindex
 437
 438   --flush      - flushes the item and directory caches.
 439   --help       - prints this message.
 440   --htmlroot   - http-server base directory from which all relative paths
 441                  in podpath stem (default is /).
 442   --index      - generate an index at the top of the resulting html
 443                  (default).
 444   --infile     - filename for the pod to convert (input taken from stdin
 445                  by default).
 446   --libpods    - colon-separated list of pages to search for =item pod
 447                  directives in as targets of C<> and implicit links (empty
 448                  by default).  note, these are not filenames, but rather
 449                  page names like those that appear in L<> links.
 450   --netscape   - will use netscape html directives when applicable.
 451   --nonetscape - will not use netscape directives (default).
 452   --outfile    - filename for the resulting html file (output sent to
 453                  stdout by default).
 454   --podpath    - colon-separated list of directories containing library
 455                  pods.  empty by default.
 456   --podroot    - filesystem base directory from which all relative paths
 457                  in podpath stem (default is .).
 458   --noindex    - don't generate an index at the top of the resulting html.
 459   --norecurse  - don't recurse on those subdirectories listed in podpath.
 460   --recurse    - recurse on those subdirectories listed in podpath
 461                  (default behavior).
 462   --title      - title that will appear in resulting html file.
 463   --verbose    - self-explanatory
 464
 465 END_OF_USAGE
 466
 467 sub parse_command_line {
 468     my ($opt_flush,$opt_help,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
 469     my $result = GetOptions(
 470                             'flush'      => \$opt_flush,
 471                             'help'       => \$opt_help,
 472                             'htmlroot=s' => \$opt_htmlroot,
 473                             'index!'     => \$opt_index,
 474                             'infile=s'   => \$opt_infile,
 475                             'libpods=s'  => \$opt_libpods,
 476                             'netscape!'  => \$opt_netscape,
 477                             'outfile=s'  => \$opt_outfile,
 478                             'podpath=s'  => \$opt_podpath,
 479                             'podroot=s'  => \$opt_podroot,
 480                             'norecurse'  => \$opt_norecurse,
 481                             'recurse!'   => \$opt_recurse,
 482                             'title=s'    => \$opt_title,
 483                             'verbose'    => \$opt_verbose,
 484                            );
 485     usage("-", "invalid parameters") if not $result;
 486
 487     usage("-") if defined $opt_help;    # see if the user asked for help
 488     $opt_help = "";                     # just to make -w shut-up.
 489
 490     $podfile  = $opt_infile if defined $opt_infile;
 491     $htmlfile = $opt_outfile if defined $opt_outfile;
 492
 493     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 494     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 495
 496     warn "Flushing item and directory caches\n"
 497         if $opt_verbose && defined $opt_flush;
 498     unlink($dircache, $itemcache) if defined $opt_flush;
 499
 500     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 501     $podroot  = $opt_podroot if defined $opt_podroot;
 502
 503     $doindex  = $opt_index if defined $opt_index;
 504     $recurse  = $opt_recurse if defined $opt_recurse;
 505     $title    = $opt_title if defined $opt_title;
 506     $verbose  = defined $opt_verbose ? 1 : 0;
 507     $netscape = $opt_netscape if defined $opt_netscape;
 508 }
 509
 510
 511 my $saved_cache_key;
 512
 513 sub get_cache {
 514     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 515     my @cache_key_args = @_;
 516
 517     # A first-level cache:
 518     # Don't bother reading the cache files if they still apply
 519     # and haven't changed since we last read them.
 520
 521     my $this_cache_key = cache_key(@cache_key_args);
 522
 523     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 524
 525     # load the cache of %pages and %items if possible.  $tests will be
 526     # non-zero if successful.
 527     my $tests = 0;
 528     if (-f $dircache && -f $itemcache) {
 529         warn "scanning for item cache\n" if $verbose;
 530         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 531     }
 532
 533     # if we didn't succeed in loading the cache then we must (re)build
 534     #  %pages and %items.
 535     if (!$tests) {
 536         warn "scanning directories in pod-path\n" if $verbose;
 537         scan_podpath($podroot, $recurse, 0);
 538     }
 539     $saved_cache_key = cache_key(@cache_key_args);
 540 }
 541
 542 sub cache_key {
 543     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 544     return join('!', $dircache, $itemcache, $recurse,
 545                 @$podpath, $podroot, stat($dircache), stat($itemcache));
 546 }
 547
 548 #
 549 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 550 #  are valid caches of %pages and %items.  if they are valid then it loads
 551 #  them and returns a non-zero value.
 552 #
 553
 554 sub load_cache {
 555     my($dircache, $itemcache, $podpath, $podroot) = @_;
 556     my($tests);
 557     local $_;
 558
 559     $tests = 0;
 560
 561     open(CACHE, "<$itemcache") ||
 562         die "$0: error opening $itemcache for reading: $!\n";
 563     $/ = "\n";
 564
 565     # is it the same podpath?
 566     $_ = <CACHE>;
 567     chomp($_);
 568     $tests++ if (join(":", @$podpath) eq $_);
 569
 570     # is it the same podroot?
 571     $_ = <CACHE>;
 572     chomp($_);
 573     $tests++ if ($podroot eq $_);
 574
 575     # load the cache if its good
 576     if ($tests != 2) {
 577         close(CACHE);
 578         return 0;
 579     }
 580
 581     warn "loading item cache\n" if $verbose;
 582     while (<CACHE>) {
 583         /(.*?) (.*)$/;
 584         $items{$1} = $2;
 585     }
 586     close(CACHE);
 587
 588     warn "scanning for directory cache\n" if $verbose;
 589     open(CACHE, "<$dircache") ||
 590         die "$0: error opening $dircache for reading: $!\n";
 591     $/ = "\n";
 592     $tests = 0;
 593
 594     # is it the same podpath?
 595     $_ = <CACHE>;
 596     chomp($_);
 597     $tests++ if (join(":", @$podpath) eq $_);
 598
 599     # is it the same podroot?
 600     $_ = <CACHE>;
 601     chomp($_);
 602     $tests++ if ($podroot eq $_);
 603
 604     # load the cache if its good
 605     if ($tests != 2) {
 606         close(CACHE);
 607         return 0;
 608     }
 609
 610     warn "loading directory cache\n" if $verbose;
 611     while (<CACHE>) {
 612         /(.*?) (.*)$/;
 613         $pages{$1} = $2;
 614     }
 615
 616     close(CACHE);
 617
 618     return 1;
 619 }
 620
 621 #
 622 # scan_podpath - scans the directories specified in @podpath for directories,
 623 #  .pod files, and .pm files.  it also scans the pod files specified in
 624 #  @libpods for =item directives.
 625 #
 626 sub scan_podpath {
 627     my($podroot, $recurse, $append) = @_;
 628     my($pwd, $dir);
 629     my($libpod, $dirname, $pod, @files, @poddata);
 630
 631     unless($append) {
 632         %items = ();
 633         %pages = ();
 634     }
 635
 636     # scan each directory listed in @podpath
 637     $pwd = getcwd();
 638     chdir($podroot)
 639         || die "$0: error changing to directory $podroot: $!\n";
 640     foreach $dir (@podpath) {
 641         scan_dir($dir, $recurse);
 642     }
 643
 644     # scan the pods listed in @libpods for =item directives
 645     foreach $libpod (@libpods) {
 646         # if the page isn't defined then we won't know where to find it
 647         # on the system.
 648         next unless defined $pages{$libpod} && $pages{$libpod};
 649
 650         # if there is a directory then use the .pod and .pm files within it.
 651         if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 652             #  find all the .pod and .pm files within the directory
 653             $dirname = $1;
 654             opendir(DIR, $dirname) ||
 655                 die "$0: error opening directory $dirname: $!\n";
 656             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 657             closedir(DIR);
 658
 659             # scan each .pod and .pm file for =item directives
 660             foreach $pod (@files) {
 661                 open(POD, "<$dirname/$pod") ||
 662                     die "$0: error opening $dirname/$pod for input: $!\n";
 663                 @poddata = <POD>;
 664                 close(POD);
 665
 666                 scan_items("$dirname/$pod", @poddata);
 667             }
 668
 669             # use the names of files as =item directives too.
 670             foreach $pod (@files) {
 671                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 672                 $items{$1} = "$dirname/$1.html" if $1;
 673             }
 674         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 675                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 676             # scan the .pod or .pm file for =item directives
 677             $pod = $1;
 678             open(POD, "<$pod") ||
 679                 die "$0: error opening $pod for input: $!\n";
 680             @poddata = <POD>;
 681             close(POD);
 682
 683             scan_items("$pod", @poddata);
 684         } else {
 685             warn "$0: shouldn't be here (line ".__LINE__."\n";
 686         }
 687     }
 688     @poddata = ();      # clean-up a bit
 689
 690     chdir($pwd)
 691         || die "$0: error changing to directory $pwd: $!\n";
 692
 693     # cache the item list for later use
 694     warn "caching items for later use\n" if $verbose;
 695     open(CACHE, ">$itemcache") ||
 696         die "$0: error open $itemcache for writing: $!\n";
 697
 698     print CACHE join(":", @podpath) . "\n$podroot\n";
 699     foreach my $key (keys %items) {
 700         print CACHE "$key $items{$key}\n";
 701     }
 702
 703     close(CACHE);
 704
 705     # cache the directory list for later use
 706     warn "caching directories for later use\n" if $verbose;
 707     open(CACHE, ">$dircache") ||
 708         die "$0: error open $dircache for writing: $!\n";
 709
 710     print CACHE join(":", @podpath) . "\n$podroot\n";
 711     foreach my $key (keys %pages) {
 712         print CACHE "$key $pages{$key}\n";
 713     }
 714
 715     close(CACHE);
 716 }
 717
 718 #
 719 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 720 #  files, and .pm files.  notes those that it finds.  this information will
 721 #  be used later in order to figure out where the pages specified in L<>
 722 #  links are on the filesystem.
 723 #
 724 sub scan_dir {
 725     my($dir, $recurse) = @_;
 726     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 727     local $_;
 728
 729     @subdirs = ();
 730     @pods = ();
 731
 732     opendir(DIR, $dir) ||
 733         die "$0: error opening directory $dir: $!\n";
 734     while (defined($_ = readdir(DIR))) {
 735         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 736             $pages{$_}  = "" unless defined $pages{$_};
 737             $pages{$_} .= "$dir/$_:";
 738             push(@subdirs, $_);
 739         } elsif (/\.pod$/) {                                # .pod
 740             s/\.pod$//;
 741             $pages{$_}  = "" unless defined $pages{$_};
 742             $pages{$_} .= "$dir/$_.pod:";
 743             push(@pods, "$dir/$_.pod");
 744         } elsif (/\.pm$/) {                                 # .pm
 745             s/\.pm$//;
 746             $pages{$_}  = "" unless defined $pages{$_};
 747             $pages{$_} .= "$dir/$_.pm:";
 748             push(@pods, "$dir/$_.pm");
 749         }
 750     }
 751     closedir(DIR);
 752
 753     # recurse on the subdirectories if necessary
 754     if ($recurse) {
 755         foreach my $subdir (@subdirs) {
 756             scan_dir("$dir/$subdir", $recurse);
 757         }
 758     }
 759 }
 760
 761 #
 762 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 763 #  build an index.
 764 #
 765 sub scan_headings {
 766     my($sections, @data) = @_;
 767     my($tag, $which_head, $title, $listdepth, $index);
 768
 769     # here we need      local $ignore = 0;
 770     #  unfortunately, we can't have it, because $ignore is lexical
 771     $ignore = 0;
 772
 773     $listdepth = 0;
 774     $index = "";
 775
 776     # scan for =head directives, note their name, and build an index
 777     #  pointing to each of them.
 778     foreach my $line (@data) {
 779         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 780             ($tag,$which_head, $title) = ($1,$2,$3);
 781             chomp($title);
 782             $$sections{htmlify(0,$title)} = 1;
 783
 784             while ($which_head != $listdepth) {
 785                 if ($which_head > $listdepth) {
 786                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 787                     $listdepth++;
 788                 } elsif ($which_head < $listdepth) {
 789                     $listdepth--;
 790                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 791                 }
 792             }
 793
 794             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 795                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 796                       html_escape(process_text(\$title, 0)) . "</A>";
 797         }
 798     }
 799
 800     # finish off the lists
 801     while ($listdepth--) {
 802         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 803     }
 804
 805     # get rid of bogus lists
 806     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 807
 808     $ignore = 1;        # restore old value;
 809
 810     return $index;
 811 }
 812
 813 #
 814 # scan_items - scans the pod specified by $pod for =item directives.  we
 815 #  will use this information later on in resolving C<> links.
 816 #
 817 sub scan_items {
 818     my($pod, @poddata) = @_;
 819     my($i, $item);
 820     local $_;
 821
 822     $pod =~ s/\.pod$//;
 823     $pod .= ".html" if $pod;
 824
 825     foreach $i (0..$#poddata) {
 826         $_ = $poddata[$i];
 827
 828         # remove any formatting instructions
 829         s,[A-Z]<([^<>]*)>,$1,g;
 830
 831         # figure out what kind of item it is and get the first word of
 832         #  it's name.
 833         if (/^=item\s+(\w*)\s*.*$/s) {
 834             if ($1 eq "*") {            # bullet list
 835                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 836                 $item = $1;
 837             } elsif ($1 =~ /^\d+/) {    # numbered list
 838                 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
 839                 $item = $1;
 840             } else {
 841 #               /\A=item\s+(.*?)\s*\Z/s;
 842                 /\A=item\s+(\w*)/s;
 843                 $item = $1;
 844             }
 845
 846             $items{$item} = "$pod" if $item;
 847         }
 848     }
 849 }
 850
 851 #
 852 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 853 #
 854 sub process_head {
 855     my($tag, $heading) = @_;
 856     my $firstword;
 857
 858     # figure out the level of the =head
 859     $tag =~ /head([1-6])/;
 860     my $level = $1;
 861
 862     # can't have a heading full of spaces and speechmarks and so on
 863     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 864
 865     print HTML "<P>\n" unless $listlevel;
 866     print HTML "<HR>\n" unless $listlevel || $top;
 867     print HTML "<H$level>"; # unless $listlevel;
 868     #print HTML "<H$level>" unless $listlevel;
 869     my $convert = $heading; process_text(\$convert, 0);
 870     $convert = html_escape($convert);
 871     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 872     print HTML "</H$level>"; # unless $listlevel;
 873     print HTML "\n";
 874 }
 875
 876 #
 877 # process_item - convert a pod item tag and convert it to HTML format.
 878 #
 879 sub process_item {
 880     my $text = $_[0];
 881     my($i, $quote, $name);
 882
 883     my $need_preamble = 0;
 884     my $this_entry;
 885
 886
 887     # lots of documents start a list without doing an =over.  this is
 888     # bad!  but, the proper thing to do seems to be to just assume
 889     # they did do an =over.  so warn them once and then continue.
 890     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 891         unless $listlevel;
 892     process_over() unless $listlevel;
 893
 894     return unless $listlevel;
 895
 896     # remove formatting instructions from the text
 897     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 898     pre_escape(\$text);
 899
 900     $need_preamble = $items_seen[$listlevel]++ == 0;
 901
 902     # check if this is the first =item after an =over
 903     $i = $listlevel - 1;
 904     my $need_new = $listlevel >= @listitem;
 905
 906     if ($text =~ /\A\*/) {              # bullet
 907
 908         if ($need_preamble) {
 909             push(@listend,  "</UL>");
 910             print HTML "<UL>\n";
 911         }
 912
 913         print HTML '<LI>';
 914         if ($text =~ /\A\*\s*(.+)\Z/s) {
 915             print HTML '<STRONG>';
 916             if ($items_named{$1}++) {
 917                 print HTML html_escape($1);
 918             } else {
 919                 my $name = 'item_' . htmlify(1,$1);
 920                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 921             }
 922             print HTML '</STRONG>';
 923         }
 924
 925     } elsif ($text =~ /\A[\d#]+/) {     # numbered list
 926
 927         if ($need_preamble) {
 928             push(@listend,  "</OL>");
 929             print HTML "<OL>\n";
 930         }
 931
 932         print HTML '<LI>';
 933         if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
 934             print HTML '<STRONG>';
 935             if ($items_named{$1}++) {
 936                 print HTML html_escape($1);
 937             } else {
 938                 my $name = 'item_' . htmlify(0,$1);
 939                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 940             }
 941             print HTML '</STRONG>';
 942         }
 943
 944     } else {                    # all others
 945
 946         if ($need_preamble) {
 947             push(@listend,  '</DL>');
 948             print HTML "<DL>\n";
 949         }
 950
 951         print HTML '<DT>';
 952         if ($text =~ /(\S+)/) {
 953             print HTML '<STRONG>';
 954             if ($items_named{$1}++) {
 955                 print HTML html_escape($text);
 956             } else {
 957                 my $name = 'item_' . htmlify(1,$text);
 958                 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
 959             }
 960             print HTML '</STRONG>';
 961         }
 962        print HTML '<DD>';
 963     }
 964
 965     print HTML "\n";
 966 }
 967
 968 #
 969 # process_over - process a pod over tag and start a corresponding HTML
 970 # list.
 971 #
 972 sub process_over {
 973     # start a new list
 974     $listlevel++;
 975 }
 976
 977 #
 978 # process_back - process a pod back tag and convert it to HTML format.
 979 #
 980 sub process_back {
 981     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
 982         unless $listlevel;
 983     return unless $listlevel;
 984
 985     # close off the list.  note, I check to see if $listend[$listlevel] is
 986     # defined because an =item directive may have never appeared and thus
 987     # $listend[$listlevel] may have never been initialized.
 988     $listlevel--;
 989     print HTML $listend[$listlevel] if defined $listend[$listlevel];
 990     print HTML "\n";
 991
 992     # don't need the corresponding perl code anymore
 993     pop(@listitem);
 994     pop(@listdata);
 995     pop(@listend);
 996
 997     pop(@items_seen);
 998 }
 999
1000 #
1001 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1002 #
1003 sub process_cut {
1004     $ignore = 1;
1005 }
1006
1007 #
1008 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1009 # corresponding cut.
1010 #
1011 sub process_pod {
1012     # no need to set $ignore to 0 cause the main loop did it
1013 }
1014
1015 #
1016 # process_for - process a =for pod tag.  if it's for html, split
1017 # it out verbatim, if illustration, center it, otherwise ignore it.
1018 #
1019 sub process_for {
1020     my($whom, $text) = @_;
1021     if ( $whom =~ /^(pod2)?html$/i) {
1022         print HTML $text;
1023     } elsif ($whom =~ /^illustration$/i) {
1024         1 while chomp $text;
1025         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1026           $text .= $ext, last if -r "$text$ext";
1027         }
1028         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1029     }
1030 }
1031
1032 #
1033 # process_begin - process a =begin pod tag.  this pushes
1034 # whom we're beginning on the begin stack.  if there's a
1035 # begin stack, we only print if it us.
1036 #
1037 sub process_begin {
1038     my($whom, $text) = @_;
1039     $whom = lc($whom);
1040     push (@begin_stack, $whom);
1041     if ( $whom =~ /^(pod2)?html$/) {
1042         print HTML $text if $text;
1043     }
1044 }
1045
1046 #
1047 # process_end - process a =end pod tag.  pop the
1048 # begin stack.  die if we're mismatched.
1049 #
1050 sub process_end {
1051     my($whom, $text) = @_;
1052     $whom = lc($whom);
1053     if ($begin_stack[-1] ne $whom ) {
1054         die "Unmatched begin/end at chunk $paragraph\n"
1055     }
1056     pop @begin_stack;
1057 }
1058
1059 #
1060 # process_text - handles plaintext that appears in the input pod file.
1061 # there may be pod commands embedded within the text so those must be
1062 # converted to html commands.
1063 #
1064 sub process_text {
1065     my($text, $escapeQuotes) = @_;
1066     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1067     my($podcommand, $params, $tag, $quote);
1068
1069     return if $ignore;
1070
1071     $quote  = 0;                # status of double-quote conversion
1072     $result = "";
1073     $rest = $$text;
1074
1075     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1076         $rest =~ s/\n+\Z//;
1077         $rest =~ s#.*#
1078             my $line = $&;
1079             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1080             $line;
1081         #eg;
1082
1083         $rest   =~ s/&/&amp;/g;
1084         $rest   =~ s/</&lt;/g;
1085         $rest   =~ s/>/&gt;/g;
1086         $rest   =~ s/"/&quot;/g;
1087
1088         # try and create links for all occurrences of perl.* within
1089         # the preformatted text.
1090         $rest =~ s{
1091                     (\s*)(perl\w+)
1092                   }{
1093                     if (defined $pages{$2}) {   # is a link
1094                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1095                     } elsif (defined $pages{dosify($2)}) {      # is a link
1096                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1097                     } else {
1098                         "$1$2";
1099                     }
1100                   }xeg;
1101         $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1102
1103   my $urls = '(' . join ('|', qw{
1104                 http
1105                 telnet
1106                 mailto
1107                 news
1108                 gopher
1109                 file
1110                 wais
1111                 ftp
1112             } )
1113         . ')';
1114
1115   my $ltrs = '\w';
1116   my $gunk = '/#~:.?+=&%@!\-';
1117   my $punc = '.:?\-';
1118   my $any  = "${ltrs}${gunk}${punc}";
1119
1120   $rest =~ s{
1121         \b                          # start at word boundary
1122         (                           # begin $1  {
1123           $urls     :               # need resource and a colon
1124           [$any] +?                 # followed by on or more
1125                                     #  of any valid character, but
1126                                     #  be conservative and take only
1127                                     #  what you need to....
1128         )                           # end   $1  }
1129         (?=                         # look-ahead non-consumptive assertion
1130                 [$punc]*            # either 0 or more puntuation
1131                 [^$any]             #   followed by a non-url char
1132             |                       # or else
1133                 $                   #   then end of the string
1134         )
1135       }{<A HREF="$1">$1</A>}igox;
1136
1137         $result =   "<PRE>"     # text should be as it is (verbatim)
1138                   . "$rest\n"
1139                   . "</PRE>\n";
1140     } else {                    # formatted text
1141         # parse through the string, stopping each time we find a
1142         # pod-escape.  once the string has been throughly processed
1143         # we can output it.
1144         while (length $rest) {
1145             # check to see if there are any possible pod directives in
1146             # the remaining part of the text.
1147             if ($rest =~ m/[BCEIFLSZ]</) {
1148                 warn "\$rest\t= $rest\n" unless
1149                     $rest =~ /\A
1150                            ([^<]*?)
1151                            ([BCEIFLSZ]?)
1152                            <
1153                            (.*)\Z/xs;
1154
1155                 $s1 = $1;       # pure text
1156                 $s2 = $2;       # the type of pod-escape that follows
1157                 $s3 = '<';      # '<'
1158                 $s4 = $3;       # the rest of the string
1159             } else {
1160                 $s1 = $rest;
1161                 $s2 = "";
1162                 $s3 = "";
1163                 $s4 = "";
1164             }
1165
1166             if ($s3 eq '<' && $s2) {    # a pod-escape
1167                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1168                 $podcommand = "$s2<";
1169                 $rest       = $s4;
1170
1171                 # find the matching '>'
1172                 $match = 1;
1173                 $bf = 0;
1174                 while ($match && !$bf) {
1175                     $bf = 1;
1176                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1177                         $bf = 0;
1178                         $match++;
1179                         $podcommand .= $1;
1180                         $rest        = $2;
1181                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1182                         $bf = 0;
1183                         $match--;
1184                         $podcommand .= $1;
1185                         $rest        = $2;
1186                     }
1187                 }
1188
1189                 if ($match != 0) {
1190                     warn <<WARN;
1191 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1192 WARN
1193                     $result .= substr $podcommand, 0, 2;
1194                     $rest = substr($podcommand, 2) . $rest;
1195                     next;
1196                 }
1197
1198                 # pull out the parameters to the pod-escape
1199                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1200                 $tag    = $1;
1201                 $params = $2;
1202
1203                 # process the text within the pod-escape so that any escapes
1204                 # which must occur do.
1205                 process_text(\$params, 0) unless $tag eq 'L';
1206
1207                 $s1 = $params;
1208                 if (!$tag || $tag eq " ") {     #  <> : no tag
1209                     $s1 = "&lt;$params&gt;";
1210                 } elsif ($tag eq "L") {         # L<> : link
1211                     $s1 = process_L($params);
1212                 } elsif ($tag eq "I" ||         # I<> : italicize text
1213                          $tag eq "B" ||         # B<> : bold text
1214                          $tag eq "F") {         # F<> : file specification
1215                     $s1 = process_BFI($tag, $params);
1216                 } elsif ($tag eq "C") {         # C<> : literal code
1217                     $s1 = process_C($params, 1);
1218                 } elsif ($tag eq "E") {         # E<> : escape
1219                     $s1 = process_E($params);
1220                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1221                     $s1 = process_Z($params);
1222                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1223                     $s1 = process_S($params);
1224                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1225                     $s1 = process_X($params);
1226                 } else {
1227                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1228                 }
1229
1230                 $result .= "$s1";
1231             } else {
1232                 # for pure text we must deal with implicit links and
1233                 # double-quotes among other things.
1234                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1235                 $rest    = $s4;
1236             }
1237         }
1238     }
1239     $$text = $result;
1240 }
1241
1242 sub html_escape {
1243     my $rest = $_[0];
1244     $rest   =~ s/&/&amp;/g;
1245     $rest   =~ s/</&lt;/g;
1246     $rest   =~ s/>/&gt;/g;
1247     $rest   =~ s/"/&quot;/g;
1248     return $rest;
1249 }
1250
1251 #
1252 # process_puretext - process pure text (without pod-escapes) converting
1253 #  double-quotes and handling implicit C<> links.
1254 #
1255 sub process_puretext {
1256     my($text, $quote) = @_;
1257     my(@words, $result, $rest, $lead, $trail);
1258
1259     # convert double-quotes to single-quotes
1260     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1261     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1262
1263     $$quote = ($text =~ m/"/ ? 1 : 0);
1264     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1265
1266     # keep track of leading and trailing white-space
1267     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1268     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1269
1270     # collapse all white space into a single space
1271     $text =~ s/\s+/ /g;
1272     @words = split(" ", $text);
1273
1274     # process each word individually
1275     foreach my $word (@words) {
1276         # see if we can infer a link
1277         if ($word =~ /^\w+\(/) {
1278             # has parenthesis so should have been a C<> ref
1279             $word = process_C($word);
1280 #           $word =~ /^[^()]*]\(/;
1281 #           if (defined $items{$1} && $items{$1}) {
1282 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1283 #                       . htmlify(0,$word)
1284 #                       . "\">$word</A></CODE>";
1285 #           } elsif (defined $items{$word} && $items{$word}) {
1286 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1287 #                       . htmlify(0,$word)
1288 #                       . "\">$word</A></CODE>";
1289 #           } else {
1290 #               $word =   "\n<CODE><A HREF=\"#item_"
1291 #                       . htmlify(0,$word)
1292 #                       . "\">$word</A></CODE>";
1293 #           }
1294         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1295             # perl variables, should be a C<> ref
1296             $word = process_C($word, 1);
1297         } elsif ($word =~ m,^\w+://\w,) {
1298             # looks like a URL
1299             $word = qq(<A HREF="$word">$word</A>);
1300         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1301             # looks like an e-mail address
1302             my ($w1, $w2, $w3) = ("", $word, "");
1303             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1304             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1305             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1306         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1307             $word = html_escape($word) if $word =~ /["&<>]/;
1308             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1309         } else {
1310             $word = html_escape($word) if $word =~ /["&<>]/;
1311         }
1312     }
1313
1314     # build a new string based upon our conversion
1315     $result = "";
1316     $rest   = join(" ", @words);
1317     while (length($rest) > 75) {
1318         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1319              $rest =~ m/^(\S*)\s(.*?)$/o) {
1320
1321             $result .= "$1\n";
1322             $rest    = $2;
1323         } else {
1324             $result .= "$rest\n";
1325             $rest    = "";
1326         }
1327     }
1328     $result .= $rest if $rest;
1329
1330     # restore the leading and trailing white-space
1331     $result = "$lead$result$trail";
1332
1333     return $result;
1334 }
1335
1336 #
1337 # pre_escape - convert & in text to $amp;
1338 #
1339 sub pre_escape {
1340     my($str) = @_;
1341
1342     $$str =~ s,&,&amp;,g;
1343 }
1344
1345 #
1346 # dosify - convert filenames to 8.3
1347 #
1348 sub dosify {
1349     my($str) = @_;
1350     if ($Is83) {
1351         $str = lc $str;
1352         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1353         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1354     }
1355     return $str;
1356 }
1357
1358 #
1359 # process_L - convert a pod L<> directive to a corresponding HTML link.
1360 #  most of the links made are inferred rather than known about directly
1361 #  (i.e it's not known whether the =head\d section exists in the target file,
1362 #   or whether a .pod file exists in the case of split files).  however, the
1363 #  guessing usually works.
1364 #
1365 # Unlike the other directives, this should be called with an unprocessed
1366 # string, else tags in the link won't be matched.
1367 #
1368 sub process_L {
1369     my($str) = @_;
1370     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1371
1372     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1373     $s1 = $str;
1374     for ($s1) {
1375         # LREF: a la HREF L<show this text|man/section>
1376         $linktext = $1 if s:^([^|]+)\|::;
1377
1378         # make sure sections start with a /
1379         s,^",/",g;
1380         s,^,/,g if (!m,/, && / /);
1381
1382         # check if there's a section specified
1383         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1384             ($page, $section) = ($1, $2);
1385         } else {                        # no
1386             ($page, $section) = ($str, "");
1387         }
1388
1389         # check if we know that this is a section in this page
1390         if (!defined $pages{$page} && defined $sections{$page}) {
1391             $section = $page;
1392             $page = "";
1393         }
1394     }
1395
1396     $page83=dosify($page);
1397     $page=$page83 if (defined $pages{$page83});
1398     if ($page eq "") {
1399         $link = "#" . htmlify(0,$section);
1400         $linktext = $section unless defined($linktext);
1401     } elsif ( $page =~ /::/ ) {
1402         $linktext  = ($section ? "$section" : "$page");
1403         $page =~ s,::,/,g;
1404         $link = "$htmlroot/$page.html";
1405         $link .= "#" . htmlify(0,$section) if ($section);
1406     } elsif (!defined $pages{$page}) {
1407         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1408         $link = "";
1409         $linktext = $page unless defined($linktext);
1410     } else {
1411         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1412         $section = htmlify(0,$section) if $section ne "";
1413
1414         # if there is a directory by the name of the page, then assume that an
1415         # appropriate section will exist in the subdirectory
1416         if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1417             $link = "$htmlroot/$1/$section.html";
1418
1419         # since there is no directory by the name of the page, the section will
1420         # have to exist within a .html of the same name.  thus, make sure there
1421         # is a .pod or .pm that might become that .html
1422         } else {
1423             $section = "#$section";
1424             # check if there is a .pod with the page name
1425             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1426                 $link = "$htmlroot/$1.html$section";
1427             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1428                 $link = "$htmlroot/$1.html$section";
1429             } else {
1430                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1431                              "no .pod or .pm found\n";
1432                 $link = "";
1433                 $linktext = $section unless defined($linktext);
1434             }
1435         }
1436     }
1437
1438     process_text(\$linktext, 0);
1439     if ($link) {
1440         $s1 = "<A HREF=\"$link\">$linktext</A>";
1441     } else {
1442         $s1 = "<EM>$linktext</EM>";
1443     }
1444     return $s1;
1445 }
1446
1447 #
1448 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1449 # convert them to corresponding HTML directives.
1450 #
1451 sub process_BFI {
1452     my($tag, $str) = @_;
1453     my($s1);                    # work string
1454     my(%repltext) = (   'B' => 'STRONG',
1455                         'F' => 'EM',
1456                         'I' => 'EM');
1457
1458     # extract the modified text and convert to HTML
1459     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1460     return $s1;
1461 }
1462
1463 #
1464 # process_C - process the C<> pod-escape.
1465 #
1466 sub process_C {
1467     my($str, $doref) = @_;
1468     my($s1, $s2);
1469
1470     $s1 = $str;
1471     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1472     $s2 = $s1;
1473     $s1 =~ s/\W//g;             # delete bogus characters
1474     $str = html_escape($str);
1475
1476     # if there was a pod file that we found earlier with an appropriate
1477     # =item directive, then create a link to that page.
1478     if ($doref && defined $items{$s1}) {
1479         $s1 = ($items{$s1} ?
1480                "<A HREF=\"$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) .  "\">$str</A>" :
1481                "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>");
1482         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1483         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1484     } else {
1485         $s1 = "<CODE>$str</CODE>";
1486         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1487     }
1488
1489
1490     return $s1;
1491 }
1492
1493 #
1494 # process_E - process the E<> pod directive which seems to escape a character.
1495 #
1496 sub process_E {
1497     my($str) = @_;
1498
1499     for ($str) {
1500         s,([^/].*),\&$1\;,g;
1501     }
1502
1503     return $str;
1504 }
1505
1506 #
1507 # process_Z - process the Z<> pod directive which really just amounts to
1508 # ignoring it.  this allows someone to start a paragraph with an =
1509 #
1510 sub process_Z {
1511     my($str) = @_;
1512
1513     # there is no equivalent in HTML for this so just ignore it.
1514     $str = "";
1515     return $str;
1516 }
1517
1518 #
1519 # process_S - process the S<> pod directive which means to convert all
1520 # spaces in the string to non-breaking spaces (in HTML-eze).
1521 #
1522 sub process_S {
1523     my($str) = @_;
1524
1525     # convert all spaces in the text to non-breaking spaces in HTML.
1526     $str =~ s/ /&nbsp;/g;
1527     return $str;
1528 }
1529
1530 #
1531 # process_X - this is supposed to make an index entry.  we'll just
1532 # ignore it.
1533 #
1534 sub process_X {
1535     return '';
1536 }
1537
1538
1539 #
1540 # finish_list - finish off any pending HTML lists.  this should be called
1541 # after the entire pod file has been read and converted.
1542 #
1543 sub finish_list {
1544     while ($listlevel > 0) {
1545         print HTML "</DL>\n";
1546         $listlevel--;
1547     }
1548 }
1549
1550 #
1551 # htmlify - converts a pod section specification to a suitable section
1552 # specification for HTML.  if first arg is 1, only takes 1st word.
1553 #
1554 sub htmlify {
1555     my($compact, $heading) = @_;
1556
1557     if ($compact) {
1558       $heading =~ /^(\w+)/;
1559       $heading = $1;
1560     }
1561
1562   # $heading = lc($heading);
1563   $heading =~ s/[^\w\s]/_/g;
1564   $heading =~ s/(\s+)/ /g;
1565   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1566   $heading =~ s/ /_/g;
1567   $heading =~ s/\A(.{32}).*\Z/$1/s;
1568   $heading =~ s/\s+\Z//;
1569   $heading =~ s/_{2,}/_/g;
1570
1571   return $heading;
1572 }
1573
1574 BEGIN {
1575 }
1576
1577 1;