lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::Spec::Unix;
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.02;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files. Not passing this
  54 causes all links to be absolute, since this is the value that tells
  55 Pod::Html the root of the documentation tree.
  56
  57 =item htmlroot
  58
  59     --htmlroot=name
  60
  61 Sets the base URL for the HTML files.  When cross-references are made,
  62 the HTML root is prepended to the URL.
  63
  64 =item infile
  65
  66     --infile=name
  67
  68 Specify the pod file to convert.  Input is taken from STDIN if no
  69 infile is specified.
  70
  71 =item outfile
  72
  73     --outfile=name
  74
  75 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  76 is specified.
  77
  78 =item podroot
  79
  80     --podroot=name
  81
  82 Specify the base directory for finding library pods.
  83
  84 =item podpath
  85
  86     --podpath=name:...:name
  87
  88 Specify which subdirectories of the podroot contain pod files whose
  89 HTML converted forms can be linked-to in cross-references.
  90
  91 =item libpods
  92
  93     --libpods=name:...:name
  94
  95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  96
  97 =item netscape
  98
  99     --netscape
 100
 101 Use Netscape HTML directives when applicable.
 102
 103 =item nonetscape
 104
 105     --nonetscape
 106
 107 Do not use Netscape HTML directives (default).
 108
 109 =item index
 110
 111     --index
 112
 113 Generate an index at the top of the HTML file (default behaviour).
 114
 115 =item noindex
 116
 117     --noindex
 118
 119 Do not generate an index at the top of the HTML file.
 120
 121
 122 =item recurse
 123
 124     --recurse
 125
 126 Recurse into subdirectories specified in podpath (default behaviour).
 127
 128 =item norecurse
 129
 130     --norecurse
 131
 132 Do not recurse into subdirectories specified in podpath.
 133
 134 =item title
 135
 136     --title=title
 137
 138 Specify the title of the resulting HTML file.
 139
 140 =item verbose
 141
 142     --verbose
 143
 144 Display progress messages.
 145
 146 =back
 147
 148 =head1 EXAMPLE
 149
 150     pod2html("pod2html",
 151              "--podpath=lib:ext:pod:vms",
 152              "--podroot=/usr/src/perl",
 153              "--htmlroot=/perl/nmanual",
 154              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 155              "--recurse",
 156              "--infile=foo.pod",
 157              "--outfile=/perl/nmanual/foo.html");
 158
 159 =head1 AUTHOR
 160
 161 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 162
 163 =head1 BUGS
 164
 165 Has trouble with C<> etc in = commands.
 166
 167 =head1 SEE ALSO
 168
 169 L<perlpod>
 170
 171 =head1 COPYRIGHT
 172
 173 This program is distributed under the Artistic License.
 174
 175 =cut
 176
 177 my $dircache = "pod2html-dircache";
 178 my $itemcache = "pod2html-itemcache";
 179
 180 my @begin_stack = ();           # begin/end stack
 181
 182 my @libpods = ();               # files to search for links from C<> directives
 183 my $htmlroot = "/";             # http-server base directory from which all
 184                                 #   relative paths in $podpath stem.
 185 my $htmldir = "";               # The directory to which the html pages
 186                                 # will (eventually) be written.
 187 my $htmlfile = "";              # write to stdout by default
 188 my $htmlfileurl = "" ;          # The url that other files would use to
 189                                 # refer to this file.  This is only used
 190                                 # to make relative urls that point to
 191                                 # other files.
 192 my $podfile = "";               # read from stdin by default
 193 my @podpath = ();               # list of directories containing library pods.
 194 my $podroot = ".";              # filesystem base directory from which all
 195                                 #   relative paths in $podpath stem.
 196 my $recurse = 1;                # recurse on subdirectories in $podpath.
 197 my $verbose = 0;                # not verbose by default
 198 my $doindex = 1;                # non-zero if we should generate an index
 199 my $listlevel = 0;              # current list depth
 200 my @listitem = ();              # stack of HTML commands to use when a =item is
 201                                 #   encountered.  the top of the stack is the
 202                                 #   current list.
 203 my @listdata = ();              # similar to @listitem, but for the text after
 204                                 #   an =item
 205 my @listend = ();               # similar to @listitem, but the text to use to
 206                                 #   end the list.
 207 my $ignore = 1;                 # whether or not to format text.  we don't
 208                                 #   format text until we hit our first pod
 209                                 #   directive.
 210
 211 my %items_named = ();           # for the multiples of the same item in perlfunc
 212 my @items_seen = ();
 213 my $netscape = 0;               # whether or not to use netscape directives.
 214 my $title;                      # title to give the pod(s)
 215 my $top = 1;                    # true if we are at the top of the doc.  used
 216                                 #   to prevent the first <HR> directive.
 217 my $paragraph;                  # which paragraph we're processing (used
 218                                 #   for error messages)
 219 my %pages = ();                 # associative array used to find the location
 220                                 #   of pages referenced by L<> links.
 221 my %sections = ();              # sections within this page
 222 my %items = ();                 # associative array used to find the location
 223                                 #   of =item directives referenced by C<> links
 224 my $Is83;                       # is dos with short filenames (8.3)
 225
 226 sub init_globals {
 227 $dircache = "pod2html-dircache";
 228 $itemcache = "pod2html-itemcache";
 229
 230 @begin_stack = ();              # begin/end stack
 231
 232 @libpods = ();          # files to search for links from C<> directives
 233 $htmlroot = "/";                # http-server base directory from which all
 234                                 #   relative paths in $podpath stem.
 235 $htmlfile = "";         # write to stdout by default
 236 $podfile = "";          # read from stdin by default
 237 @podpath = ();          # list of directories containing library pods.
 238 $podroot = ".";         # filesystem base directory from which all
 239                                 #   relative paths in $podpath stem.
 240 $recurse = 1;           # recurse on subdirectories in $podpath.
 241 $verbose = 0;           # not verbose by default
 242 $doindex = 1;                   # non-zero if we should generate an index
 243 $listlevel = 0;         # current list depth
 244 @listitem = ();         # stack of HTML commands to use when a =item is
 245                                 #   encountered.  the top of the stack is the
 246                                 #   current list.
 247 @listdata = ();         # similar to @listitem, but for the text after
 248                                 #   an =item
 249 @listend = ();          # similar to @listitem, but the text to use to
 250                                 #   end the list.
 251 $ignore = 1;                    # whether or not to format text.  we don't
 252                                 #   format text until we hit our first pod
 253                                 #   directive.
 254
 255 @items_seen = ();
 256 %items_named = ();
 257 $netscape = 0;          # whether or not to use netscape directives.
 258 $title = '';                    # title to give the pod(s)
 259 $top = 1;                       # true if we are at the top of the doc.  used
 260                                 #   to prevent the first <HR> directive.
 261 $paragraph = '';                        # which paragraph we're processing (used
 262                                 #   for error messages)
 263 %sections = ();         # sections within this page
 264
 265 # These are not reinitialised here but are kept as a cache.
 266 # See get_cache and related cache management code.
 267 #%pages = ();                   # associative array used to find the location
 268                                 #   of pages referenced by L<> links.
 269 #%items = ();                   # associative array used to find the location
 270                                 #   of =item directives referenced by C<> links
 271 $Is83=$^O eq 'dos';
 272 }
 273
 274 sub pod2html {
 275     local(@ARGV) = @_;
 276     local($/);
 277     local $_;
 278
 279     init_globals();
 280
 281     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 282
 283     # cache of %pages and %items from last time we ran pod2html
 284
 285     #undef $opt_help if defined $opt_help;
 286
 287     # parse the command-line parameters
 288     parse_command_line();
 289
 290     # set some variables to their default values if necessary
 291     local *POD;
 292     unless (@ARGV && $ARGV[0]) {
 293         $podfile  = "-" unless $podfile;        # stdin
 294         open(POD, "<$podfile")
 295                 || die "$0: cannot open $podfile file for input: $!\n";
 296     } else {
 297         $podfile = $ARGV[0];  # XXX: might be more filenames
 298         *POD = *ARGV;
 299     }
 300     $htmlfile = "-" unless $htmlfile;   # stdout
 301     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 302     $htmldir =~ s#/$## ;                # so we don't get a //
 303     if (  $htmlroot eq ''
 304        && defined( $htmldir )
 305        && $htmldir ne ''
 306        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 307        )
 308     {
 309         # Set the 'base' url for this file, so that we can use it
 310         # as the location from which to calculate relative links
 311         # to other files. If this is '', then absolute links will
 312         # be used throughout.
 313         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 314     }
 315
 316     # read the pod a paragraph at a time
 317     warn "Scanning for sections in input file(s)\n" if $verbose;
 318     $/ = "";
 319     my @poddata  = <POD>;
 320     close(POD);
 321
 322     # scan the pod for =head[1-6] directives and build an index
 323     my $index = scan_headings(\%sections, @poddata);
 324
 325     unless($index) {
 326         warn "No pod in $podfile\n" if $verbose;
 327         return;
 328     }
 329
 330     # open the output file
 331     open(HTML, ">$htmlfile")
 332             || die "$0: cannot open $htmlfile file for output: $!\n";
 333
 334     # put a title in the HTML file if one wasn't specified
 335     if ($title eq '') {
 336         TITLE_SEARCH: {
 337             for (my $i = 0; $i < @poddata; $i++) {
 338                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 339                     for my $para ( @poddata[$i, $i+1] ) {
 340                         last TITLE_SEARCH
 341                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 342                     }
 343                 }
 344
 345             }
 346         }
 347     }
 348     if (!$title and $podfile =~ /\.pod$/) {
 349         # probably a split pod so take first =head[12] as title
 350         for (my $i = 0; $i < @poddata; $i++) {
 351             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 352         }
 353         warn "adopted '$title' as title for $podfile\n"
 354             if $verbose and $title;
 355     }
 356     if ($title) {
 357         $title =~ s/\s*\(.*\)//;
 358     } else {
 359         warn "$0: no title for $podfile";
 360         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 361         $title = ($podfile eq "-" ? 'No Title' : $1);
 362         warn "using $title" if $verbose;
 363     }
 364     print HTML <<END_OF_HEAD;
 365 <HTML>
 366 <HEAD>
 367 <TITLE>$title</TITLE>
 368 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 369 </HEAD>
 370
 371 <BODY>
 372
 373 END_OF_HEAD
 374
 375     # load/reload/validate/cache %pages and %items
 376     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 377
 378     # scan the pod for =item directives
 379     scan_items("", \%items, @poddata);
 380
 381     # put an index at the top of the file.  note, if $doindex is 0 we
 382     # still generate an index, but surround it with an html comment.
 383     # that way some other program can extract it if desired.
 384     $index =~ s/--+/-/g;
 385     print HTML "<!-- INDEX BEGIN -->\n";
 386     print HTML "<!--\n" unless $doindex;
 387     print HTML $index;
 388     print HTML "-->\n" unless $doindex;
 389     print HTML "<!-- INDEX END -->\n\n";
 390     print HTML "<HR>\n" if $doindex;
 391
 392     # now convert this file
 393     warn "Converting input file\n" if $verbose;
 394     foreach my $i (0..$#poddata) {
 395         $_ = $poddata[$i];
 396         $paragraph = $i+1;
 397         if (/^(=.*)/s) {        # is it a pod directive?
 398             $ignore = 0;
 399             $_ = $1;
 400             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 401                 process_begin($1, $2);
 402             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 403                 process_end($1, $2);
 404             } elsif (/^=cut/) {                 # =cut
 405                 process_cut();
 406             } elsif (/^=pod/) {                 # =pod
 407                 process_pod();
 408             } else {
 409                 next if @begin_stack && $begin_stack[-1] ne 'html';
 410
 411                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 412                     process_head($1, $2);
 413                 } elsif (/^=item\s*(.*\S)/sm) { # =item text
 414                     process_item($1);
 415                 } elsif (/^=over\s*(.*)/) {             # =over N
 416                     process_over();
 417                 } elsif (/^=back/) {            # =back
 418                     process_back();
 419                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 420                     process_for($1,$2);
 421                 } else {
 422                     /^=(\S*)\s*/;
 423                     warn "$0: $podfile: unknown pod directive '$1' in "
 424                        . "paragraph $paragraph.  ignoring.\n";
 425                 }
 426             }
 427             $top = 0;
 428         }
 429         else {
 430             next if $ignore;
 431             next if @begin_stack && $begin_stack[-1] ne 'html';
 432             my $text = $_;
 433             process_text(\$text, 1);
 434             print HTML "<P>\n$text";
 435         }
 436     }
 437
 438     # finish off any pending directives
 439     finish_list();
 440     print HTML <<END_OF_TAIL;
 441 </BODY>
 442
 443 </HTML>
 444 END_OF_TAIL
 445
 446     # close the html file
 447     close(HTML);
 448
 449     warn "Finished\n" if $verbose;
 450 }
 451
 452 ##############################################################################
 453
 454 my $usage;                      # see below
 455 sub usage {
 456     my $podfile = shift;
 457     warn "$0: $podfile: @_\n" if @_;
 458     die $usage;
 459 }
 460
 461 $usage =<<END_OF_USAGE;
 462 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 463            --podpath=<name>:...:<name> --podroot=<name>
 464            --libpods=<name>:...:<name> --recurse --verbose --index
 465            --netscape --norecurse --noindex
 466
 467   --flush      - flushes the item and directory caches.
 468   --help       - prints this message.
 469   --htmlroot   - http-server base directory from which all relative paths
 470                  in podpath stem (default is /).
 471   --index      - generate an index at the top of the resulting html
 472                  (default).
 473   --infile     - filename for the pod to convert (input taken from stdin
 474                  by default).
 475   --libpods    - colon-separated list of pages to search for =item pod
 476                  directives in as targets of C<> and implicit links (empty
 477                  by default).  note, these are not filenames, but rather
 478                  page names like those that appear in L<> links.
 479   --netscape   - will use netscape html directives when applicable.
 480   --nonetscape - will not use netscape directives (default).
 481   --outfile    - filename for the resulting html file (output sent to
 482                  stdout by default).
 483   --podpath    - colon-separated list of directories containing library
 484                  pods.  empty by default.
 485   --podroot    - filesystem base directory from which all relative paths
 486                  in podpath stem (default is .).
 487   --noindex    - don't generate an index at the top of the resulting html.
 488   --norecurse  - don't recurse on those subdirectories listed in podpath.
 489   --recurse    - recurse on those subdirectories listed in podpath
 490                  (default behavior).
 491   --title      - title that will appear in resulting html file.
 492   --verbose    - self-explanatory
 493
 494 END_OF_USAGE
 495
 496 sub parse_command_line {
 497     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose);
 498     my $result = GetOptions(
 499                             'flush'      => \$opt_flush,
 500                             'help'       => \$opt_help,
 501                             'htmldir=s'  => \$opt_htmldir,
 502                             'htmlroot=s' => \$opt_htmlroot,
 503                             'index!'     => \$opt_index,
 504                             'infile=s'   => \$opt_infile,
 505                             'libpods=s'  => \$opt_libpods,
 506                             'netscape!'  => \$opt_netscape,
 507                             'outfile=s'  => \$opt_outfile,
 508                             'podpath=s'  => \$opt_podpath,
 509                             'podroot=s'  => \$opt_podroot,
 510                             'norecurse'  => \$opt_norecurse,
 511                             'recurse!'   => \$opt_recurse,
 512                             'title=s'    => \$opt_title,
 513                             'verbose'    => \$opt_verbose,
 514                            );
 515     usage("-", "invalid parameters") if not $result;
 516
 517     usage("-") if defined $opt_help;    # see if the user asked for help
 518     $opt_help = "";                     # just to make -w shut-up.
 519
 520     $podfile  = $opt_infile if defined $opt_infile;
 521     $htmlfile = $opt_outfile if defined $opt_outfile;
 522     $htmldir  = $opt_htmldir if defined $opt_outfile;
 523
 524     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 525     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 526
 527     warn "Flushing item and directory caches\n"
 528         if $opt_verbose && defined $opt_flush;
 529     unlink($dircache, $itemcache) if defined $opt_flush;
 530
 531     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 532     $podroot  = $opt_podroot if defined $opt_podroot;
 533
 534     $doindex  = $opt_index if defined $opt_index;
 535     $recurse  = $opt_recurse if defined $opt_recurse;
 536     $title    = $opt_title if defined $opt_title;
 537     $verbose  = defined $opt_verbose ? 1 : 0;
 538     $netscape = $opt_netscape if defined $opt_netscape;
 539 }
 540
 541
 542 my $saved_cache_key;
 543
 544 sub get_cache {
 545     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 546     my @cache_key_args = @_;
 547
 548     # A first-level cache:
 549     # Don't bother reading the cache files if they still apply
 550     # and haven't changed since we last read them.
 551
 552     my $this_cache_key = cache_key(@cache_key_args);
 553
 554     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 555
 556     # load the cache of %pages and %items if possible.  $tests will be
 557     # non-zero if successful.
 558     my $tests = 0;
 559     if (-f $dircache && -f $itemcache) {
 560         warn "scanning for item cache\n" if $verbose;
 561         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 562     }
 563
 564     # if we didn't succeed in loading the cache then we must (re)build
 565     #  %pages and %items.
 566     if (!$tests) {
 567         warn "scanning directories in pod-path\n" if $verbose;
 568         scan_podpath($podroot, $recurse, 0);
 569     }
 570     $saved_cache_key = cache_key(@cache_key_args);
 571 }
 572
 573 sub cache_key {
 574     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 575     return join('!', $dircache, $itemcache, $recurse,
 576         @$podpath, $podroot, stat($dircache), stat($itemcache));
 577 }
 578
 579 #
 580 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 581 #  are valid caches of %pages and %items.  if they are valid then it loads
 582 #  them and returns a non-zero value.
 583 #
 584
 585 sub load_cache {
 586     my($dircache, $itemcache, $podpath, $podroot) = @_;
 587     my($tests);
 588     local $_;
 589
 590     $tests = 0;
 591
 592     open(CACHE, "<$itemcache") ||
 593         die "$0: error opening $itemcache for reading: $!\n";
 594     $/ = "\n";
 595
 596     # is it the same podpath?
 597     $_ = <CACHE>;
 598     chomp($_);
 599     $tests++ if (join(":", @$podpath) eq $_);
 600
 601     # is it the same podroot?
 602     $_ = <CACHE>;
 603     chomp($_);
 604     $tests++ if ($podroot eq $_);
 605
 606     # load the cache if its good
 607     if ($tests != 2) {
 608         close(CACHE);
 609         return 0;
 610     }
 611
 612     warn "loading item cache\n" if $verbose;
 613     while (<CACHE>) {
 614         /(.*?) (.*)$/;
 615         $items{$1} = $2;
 616     }
 617     close(CACHE);
 618
 619     warn "scanning for directory cache\n" if $verbose;
 620     open(CACHE, "<$dircache") ||
 621         die "$0: error opening $dircache for reading: $!\n";
 622     $/ = "\n";
 623     $tests = 0;
 624
 625     # is it the same podpath?
 626     $_ = <CACHE>;
 627     chomp($_);
 628     $tests++ if (join(":", @$podpath) eq $_);
 629
 630     # is it the same podroot?
 631     $_ = <CACHE>;
 632     chomp($_);
 633     $tests++ if ($podroot eq $_);
 634
 635     # load the cache if its good
 636     if ($tests != 2) {
 637         close(CACHE);
 638         return 0;
 639     }
 640
 641     warn "loading directory cache\n" if $verbose;
 642     while (<CACHE>) {
 643         /(.*?) (.*)$/;
 644         $pages{$1} = $2;
 645     }
 646
 647     close(CACHE);
 648
 649     return 1;
 650 }
 651
 652 #
 653 # scan_podpath - scans the directories specified in @podpath for directories,
 654 #  .pod files, and .pm files.  it also scans the pod files specified in
 655 #  @libpods for =item directives.
 656 #
 657 sub scan_podpath {
 658     my($podroot, $recurse, $append) = @_;
 659     my($pwd, $dir);
 660     my($libpod, $dirname, $pod, @files, @poddata);
 661
 662     unless($append) {
 663         %items = ();
 664         %pages = ();
 665     }
 666
 667     # scan each directory listed in @podpath
 668     $pwd = getcwd();
 669     chdir($podroot)
 670         || die "$0: error changing to directory $podroot: $!\n";
 671     foreach $dir (@podpath) {
 672         scan_dir($dir, $recurse);
 673     }
 674
 675     # scan the pods listed in @libpods for =item directives
 676     foreach $libpod (@libpods) {
 677         # if the page isn't defined then we won't know where to find it
 678         # on the system.
 679         next unless defined $pages{$libpod} && $pages{$libpod};
 680
 681         # if there is a directory then use the .pod and .pm files within it.
 682         # NOTE: Only finds the first so-named directory in the tree.
 683 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 684         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 685             #  find all the .pod and .pm files within the directory
 686             $dirname = $1;
 687             opendir(DIR, $dirname) ||
 688                 die "$0: error opening directory $dirname: $!\n";
 689             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 690             closedir(DIR);
 691
 692             # scan each .pod and .pm file for =item directives
 693             foreach $pod (@files) {
 694                 open(POD, "<$dirname/$pod") ||
 695                     die "$0: error opening $dirname/$pod for input: $!\n";
 696                 @poddata = <POD>;
 697                 close(POD);
 698
 699                 scan_items("$dirname/$pod", @poddata);
 700             }
 701
 702             # use the names of files as =item directives too.
 703             foreach $pod (@files) {
 704                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 705                 $items{$1} = "$dirname/$1.html" if $1;
 706             }
 707         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 708                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 709             # scan the .pod or .pm file for =item directives
 710             $pod = $1;
 711             open(POD, "<$pod") ||
 712                 die "$0: error opening $pod for input: $!\n";
 713             @poddata = <POD>;
 714             close(POD);
 715
 716             scan_items("$pod", @poddata);
 717         } else {
 718             warn "$0: shouldn't be here (line ".__LINE__."\n";
 719         }
 720     }
 721     @poddata = ();      # clean-up a bit
 722
 723     chdir($pwd)
 724         || die "$0: error changing to directory $pwd: $!\n";
 725
 726     # cache the item list for later use
 727     warn "caching items for later use\n" if $verbose;
 728     open(CACHE, ">$itemcache") ||
 729         die "$0: error open $itemcache for writing: $!\n";
 730
 731     print CACHE join(":", @podpath) . "\n$podroot\n";
 732     foreach my $key (keys %items) {
 733         print CACHE "$key $items{$key}\n";
 734     }
 735
 736     close(CACHE);
 737
 738     # cache the directory list for later use
 739     warn "caching directories for later use\n" if $verbose;
 740     open(CACHE, ">$dircache") ||
 741         die "$0: error open $dircache for writing: $!\n";
 742
 743     print CACHE join(":", @podpath) . "\n$podroot\n";
 744     foreach my $key (keys %pages) {
 745         print CACHE "$key $pages{$key}\n";
 746     }
 747
 748     close(CACHE);
 749 }
 750
 751 #
 752 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 753 #  files, and .pm files.  notes those that it finds.  this information will
 754 #  be used later in order to figure out where the pages specified in L<>
 755 #  links are on the filesystem.
 756 #
 757 sub scan_dir {
 758     my($dir, $recurse) = @_;
 759     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 760     local $_;
 761
 762     @subdirs = ();
 763     @pods = ();
 764
 765     opendir(DIR, $dir) ||
 766         die "$0: error opening directory $dir: $!\n";
 767     while (defined($_ = readdir(DIR))) {
 768         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 769             $pages{$_}  = "" unless defined $pages{$_};
 770             $pages{$_} .= "$dir/$_:";
 771             push(@subdirs, $_);
 772         } elsif (/\.pod$/) {                                # .pod
 773             s/\.pod$//;
 774             $pages{$_}  = "" unless defined $pages{$_};
 775             $pages{$_} .= "$dir/$_.pod:";
 776             push(@pods, "$dir/$_.pod");
 777         } elsif (/\.pm$/) {                                 # .pm
 778             s/\.pm$//;
 779             $pages{$_}  = "" unless defined $pages{$_};
 780             $pages{$_} .= "$dir/$_.pm:";
 781             push(@pods, "$dir/$_.pm");
 782         }
 783     }
 784     closedir(DIR);
 785
 786     # recurse on the subdirectories if necessary
 787     if ($recurse) {
 788         foreach my $subdir (@subdirs) {
 789             scan_dir("$dir/$subdir", $recurse);
 790         }
 791     }
 792 }
 793
 794 #
 795 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 796 #  build an index.
 797 #
 798 sub scan_headings {
 799     my($sections, @data) = @_;
 800     my($tag, $which_head, $title, $listdepth, $index);
 801
 802     # here we need      local $ignore = 0;
 803     #  unfortunately, we can't have it, because $ignore is lexical
 804     $ignore = 0;
 805
 806     $listdepth = 0;
 807     $index = "";
 808
 809     # scan for =head directives, note their name, and build an index
 810     #  pointing to each of them.
 811     foreach my $line (@data) {
 812         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 813             ($tag,$which_head, $title) = ($1,$2,$3);
 814             chomp($title);
 815             $$sections{htmlify(0,$title)} = 1;
 816
 817             while ($which_head != $listdepth) {
 818                 if ($which_head > $listdepth) {
 819                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 820                     $listdepth++;
 821                 } elsif ($which_head < $listdepth) {
 822                     $listdepth--;
 823                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 824                 }
 825             }
 826
 827             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 828                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 829                       html_escape(process_text(\$title, 0)) . "</A>";
 830         }
 831     }
 832
 833     # finish off the lists
 834     while ($listdepth--) {
 835         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 836     }
 837
 838     # get rid of bogus lists
 839     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 840
 841     $ignore = 1;        # restore old value;
 842
 843     return $index;
 844 }
 845
 846 #
 847 # scan_items - scans the pod specified by $pod for =item directives.  we
 848 #  will use this information later on in resolving C<> links.
 849 #
 850 sub scan_items {
 851     my($pod, @poddata) = @_;
 852     my($i, $item);
 853     local $_;
 854
 855     $pod =~ s/\.pod$//;
 856     $pod .= ".html" if $pod;
 857
 858     foreach $i (0..$#poddata) {
 859         $_ = $poddata[$i];
 860
 861         # remove any formatting instructions
 862         s,[A-Z]<([^<>]*)>,$1,g;
 863
 864         # figure out what kind of item it is and get the first word of
 865         #  it's name.
 866         if (/^=item\s+(\w*)\s*.*$/s) {
 867             if ($1 eq "*") {            # bullet list
 868                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 869                 $item = $1;
 870             } elsif ($1 =~ /^\d+/) {    # numbered list
 871                 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
 872                 $item = $1;
 873             } else {
 874 #               /\A=item\s+(.*?)\s*\Z/s;
 875                 /\A=item\s+(\w*)/s;
 876                 $item = $1;
 877             }
 878
 879             $items{$item} = "$pod" if $item;
 880         }
 881     }
 882 }
 883
 884 #
 885 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 886 #
 887 sub process_head {
 888     my($tag, $heading) = @_;
 889     my $firstword;
 890
 891     # figure out the level of the =head
 892     $tag =~ /head([1-6])/;
 893     my $level = $1;
 894
 895     # can't have a heading full of spaces and speechmarks and so on
 896     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 897
 898     print HTML "<P>\n" unless $listlevel;
 899     print HTML "<HR>\n" unless $listlevel || $top;
 900     print HTML "<H$level>"; # unless $listlevel;
 901     #print HTML "<H$level>" unless $listlevel;
 902     my $convert = $heading; process_text(\$convert, 0);
 903     $convert = html_escape($convert);
 904     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 905     print HTML "</H$level>"; # unless $listlevel;
 906     print HTML "\n";
 907 }
 908
 909 #
 910 # process_item - convert a pod item tag and convert it to HTML format.
 911 #
 912 sub process_item {
 913     my $text = $_[0];
 914     my($i, $quote, $name);
 915
 916     my $need_preamble = 0;
 917     my $this_entry;
 918
 919
 920     # lots of documents start a list without doing an =over.  this is
 921     # bad!  but, the proper thing to do seems to be to just assume
 922     # they did do an =over.  so warn them once and then continue.
 923     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 924         unless $listlevel;
 925     process_over() unless $listlevel;
 926
 927     return unless $listlevel;
 928
 929     # remove formatting instructions from the text
 930     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 931     pre_escape(\$text);
 932
 933     $need_preamble = $items_seen[$listlevel]++ == 0;
 934
 935     # check if this is the first =item after an =over
 936     $i = $listlevel - 1;
 937     my $need_new = $listlevel >= @listitem;
 938
 939     if ($text =~ /\A\*/) {              # bullet
 940
 941         if ($need_preamble) {
 942             push(@listend,  "</UL>");
 943             print HTML "<UL>\n";
 944         }
 945
 946         print HTML '<LI>';
 947         if ($text =~ /\A\*\s*(.+)\Z/s) {
 948             print HTML '<STRONG>';
 949             if ($items_named{$1}++) {
 950                 print HTML html_escape($1);
 951             } else {
 952                 my $name = 'item_' . htmlify(1,$1);
 953                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 954             }
 955             print HTML '</STRONG>';
 956         }
 957
 958     } elsif ($text =~ /\A[\d#]+/) {     # numbered list
 959
 960         if ($need_preamble) {
 961             push(@listend,  "</OL>");
 962             print HTML "<OL>\n";
 963         }
 964
 965         print HTML '<LI>';
 966         if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
 967             print HTML '<STRONG>';
 968             if ($items_named{$1}++) {
 969                 print HTML html_escape($1);
 970             } else {
 971                 my $name = 'item_' . htmlify(0,$1);
 972                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 973             }
 974             print HTML '</STRONG>';
 975         }
 976
 977     } else {                    # all others
 978
 979         if ($need_preamble) {
 980             push(@listend,  '</DL>');
 981             print HTML "<DL>\n";
 982         }
 983
 984         print HTML '<DT>';
 985         if ($text =~ /(\S+)/) {
 986             print HTML '<STRONG>';
 987             if ($items_named{$1}++) {
 988                 print HTML html_escape($text);
 989             } else {
 990                 my $name = 'item_' . htmlify(1,$text);
 991                 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
 992             }
 993             print HTML '</STRONG>';
 994         }
 995        print HTML '<DD>';
 996     }
 997
 998     print HTML "\n";
 999 }
1000
1001 #
1002 # process_over - process a pod over tag and start a corresponding HTML
1003 # list.
1004 #
1005 sub process_over {
1006     # start a new list
1007     $listlevel++;
1008 }
1009
1010 #
1011 # process_back - process a pod back tag and convert it to HTML format.
1012 #
1013 sub process_back {
1014     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
1015         unless $listlevel;
1016     return unless $listlevel;
1017
1018     # close off the list.  note, I check to see if $listend[$listlevel] is
1019     # defined because an =item directive may have never appeared and thus
1020     # $listend[$listlevel] may have never been initialized.
1021     $listlevel--;
1022     print HTML $listend[$listlevel] if defined $listend[$listlevel];
1023     print HTML "\n";
1024
1025     # don't need the corresponding perl code anymore
1026     pop(@listitem);
1027     pop(@listdata);
1028     pop(@listend);
1029
1030     pop(@items_seen);
1031 }
1032
1033 #
1034 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1035 #
1036 sub process_cut {
1037     $ignore = 1;
1038 }
1039
1040 #
1041 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1042 # corresponding cut.
1043 #
1044 sub process_pod {
1045     # no need to set $ignore to 0 cause the main loop did it
1046 }
1047
1048 #
1049 # process_for - process a =for pod tag.  if it's for html, split
1050 # it out verbatim, if illustration, center it, otherwise ignore it.
1051 #
1052 sub process_for {
1053     my($whom, $text) = @_;
1054     if ( $whom =~ /^(pod2)?html$/i) {
1055         print HTML $text;
1056     } elsif ($whom =~ /^illustration$/i) {
1057         1 while chomp $text;
1058         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1059           $text .= $ext, last if -r "$text$ext";
1060         }
1061         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1062     }
1063 }
1064
1065 #
1066 # process_begin - process a =begin pod tag.  this pushes
1067 # whom we're beginning on the begin stack.  if there's a
1068 # begin stack, we only print if it us.
1069 #
1070 sub process_begin {
1071     my($whom, $text) = @_;
1072     $whom = lc($whom);
1073     push (@begin_stack, $whom);
1074     if ( $whom =~ /^(pod2)?html$/) {
1075         print HTML $text if $text;
1076     }
1077 }
1078
1079 #
1080 # process_end - process a =end pod tag.  pop the
1081 # begin stack.  die if we're mismatched.
1082 #
1083 sub process_end {
1084     my($whom, $text) = @_;
1085     $whom = lc($whom);
1086     if ($begin_stack[-1] ne $whom ) {
1087         die "Unmatched begin/end at chunk $paragraph\n"
1088     }
1089     pop @begin_stack;
1090 }
1091
1092 #
1093 # process_text - handles plaintext that appears in the input pod file.
1094 # there may be pod commands embedded within the text so those must be
1095 # converted to html commands.
1096 #
1097 sub process_text {
1098     my($text, $escapeQuotes) = @_;
1099     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1100     my($podcommand, $params, $tag, $quote);
1101
1102     return if $ignore;
1103
1104     $quote  = 0;                # status of double-quote conversion
1105     $result = "";
1106     $rest = $$text;
1107
1108     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1109         $rest =~ s/\n+\Z//;
1110         $rest =~ s#.*#
1111             my $line = $&;
1112             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1113             $line;
1114         #eg;
1115
1116         $rest   =~ s/&/&amp;/g;
1117         $rest   =~ s/</&lt;/g;
1118         $rest   =~ s/>/&gt;/g;
1119         $rest   =~ s/"/&quot;/g;
1120
1121         # try and create links for all occurrences of perl.* within
1122         # the preformatted text.
1123         $rest =~ s{
1124                     (\s*)(perl\w+)
1125                   }{
1126                     if (defined $pages{$2}) {   # is a link
1127                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1128                     } elsif (defined $pages{dosify($2)}) {      # is a link
1129                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1130                     } else {
1131                         "$1$2";
1132                     }
1133                   }xeg;
1134 #       $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1135         $rest =~ s{
1136                     (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1137                   }{
1138                     my $url ;
1139                     if ( $htmlfileurl ne '' ) {
1140                         # Here, we take advantage of the knowledge
1141                         # that $htmlfileurl ne '' implies $htmlroot eq ''.
1142                         # Since $htmlroot eq '', we need to prepend $htmldir
1143                         # on the fron of the link to get the absolute path
1144                         # of the link's target. We check for a leading '/'
1145                         # to avoid corrupting links that are #, file:, etc.
1146                         my $old_url = $3 ;
1147                         $old_url = "$htmldir$old_url"
1148                             if ( $old_url =~ m{^\/} ) ;
1149                         $url = relativize_url( "$old_url.html", $htmlfileurl );
1150 # print( "  a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1151                     }
1152                     else {
1153                         $url = "$3.html" ;
1154                     }
1155                     "$1$url" ;
1156                   }xeg;
1157
1158   # Look for embedded URLs and make them in to links.  We don't
1159   # relativize them since they are best left as the author intended.
1160   my $urls = '(' . join ('|', qw{
1161                 http
1162                 telnet
1163                 mailto
1164                 news
1165                 gopher
1166                 file
1167                 wais
1168                 ftp
1169             } )
1170         . ')';
1171
1172   my $ltrs = '\w';
1173   my $gunk = '/#~:.?+=&%@!\-';
1174   my $punc = '.:?\-';
1175   my $any  = "${ltrs}${gunk}${punc}";
1176
1177   $rest =~ s{
1178         \b                          # start at word boundary
1179         (                           # begin $1  {
1180           $urls     :               # need resource and a colon
1181           (?!:)                     # Ignore File::, among others.
1182           [$any] +?                 # followed by on or more
1183                                     #  of any valid character, but
1184                                     #  be conservative and take only
1185                                     #  what you need to....
1186         )                           # end   $1  }
1187         (?=                         # look-ahead non-consumptive assertion
1188                 [$punc]*            # either 0 or more puntuation
1189                 [^$any]             #   followed by a non-url char
1190             |                       # or else
1191                 $                   #   then end of the string
1192         )
1193       }{<A HREF="$1">$1</A>}igox;
1194
1195         $result =   "<PRE>"     # text should be as it is (verbatim)
1196                   . "$rest\n"
1197                   . "</PRE>\n";
1198     } else {                    # formatted text
1199         # parse through the string, stopping each time we find a
1200         # pod-escape.  once the string has been throughly processed
1201         # we can output it.
1202         while (length $rest) {
1203             # check to see if there are any possible pod directives in
1204             # the remaining part of the text.
1205             if ($rest =~ m/[BCEIFLSZ]</) {
1206                 warn "\$rest\t= $rest\n" unless
1207                     $rest =~ /\A
1208                            ([^<]*?)
1209                            ([BCEIFLSZ]?)
1210                            <
1211                            (.*)\Z/xs;
1212
1213                 $s1 = $1;       # pure text
1214                 $s2 = $2;       # the type of pod-escape that follows
1215                 $s3 = '<';      # '<'
1216                 $s4 = $3;       # the rest of the string
1217             } else {
1218                 $s1 = $rest;
1219                 $s2 = "";
1220                 $s3 = "";
1221                 $s4 = "";
1222             }
1223
1224             if ($s3 eq '<' && $s2) {    # a pod-escape
1225                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1226                 $podcommand = "$s2<";
1227                 $rest       = $s4;
1228
1229                 # find the matching '>'
1230                 $match = 1;
1231                 $bf = 0;
1232                 while ($match && !$bf) {
1233                     $bf = 1;
1234                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1235                         $bf = 0;
1236                         $match++;
1237                         $podcommand .= $1;
1238                         $rest        = $2;
1239                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1240                         $bf = 0;
1241                         $match--;
1242                         $podcommand .= $1;
1243                         $rest        = $2;
1244                     }
1245                 }
1246
1247                 if ($match != 0) {
1248                     warn <<WARN;
1249 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1250 WARN
1251                     $result .= substr $podcommand, 0, 2;
1252                     $rest = substr($podcommand, 2) . $rest;
1253                     next;
1254                 }
1255
1256                 # pull out the parameters to the pod-escape
1257                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1258                 $tag    = $1;
1259                 $params = $2;
1260
1261                 # process the text within the pod-escape so that any escapes
1262                 # which must occur do.
1263                 process_text(\$params, 0) unless $tag eq 'L';
1264
1265                 $s1 = $params;
1266                 if (!$tag || $tag eq " ") {     #  <> : no tag
1267                     $s1 = "&lt;$params&gt;";
1268                 } elsif ($tag eq "L") {         # L<> : link
1269                     $s1 = process_L($params);
1270                 } elsif ($tag eq "I" ||         # I<> : italicize text
1271                          $tag eq "B" ||         # B<> : bold text
1272                          $tag eq "F") {         # F<> : file specification
1273                     $s1 = process_BFI($tag, $params);
1274                 } elsif ($tag eq "C") {         # C<> : literal code
1275                     $s1 = process_C($params, 1);
1276                 } elsif ($tag eq "E") {         # E<> : escape
1277                     $s1 = process_E($params);
1278                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1279                     $s1 = process_Z($params);
1280                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1281                     $s1 = process_S($params);
1282                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1283                     $s1 = process_X($params);
1284                 } else {
1285                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1286                 }
1287
1288                 $result .= "$s1";
1289             } else {
1290                 # for pure text we must deal with implicit links and
1291                 # double-quotes among other things.
1292                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1293                 $rest    = $s4;
1294             }
1295         }
1296     }
1297     $$text = $result;
1298 }
1299
1300 sub html_escape {
1301     my $rest = $_[0];
1302     $rest   =~ s/&(?!\w+;|#)/&amp;/g;   # XXX not bulletproof
1303     $rest   =~ s/</&lt;/g;
1304     $rest   =~ s/>/&gt;/g;
1305     $rest   =~ s/"/&quot;/g;
1306     return $rest;
1307 }
1308
1309 #
1310 # process_puretext - process pure text (without pod-escapes) converting
1311 #  double-quotes and handling implicit C<> links.
1312 #
1313 sub process_puretext {
1314     my($text, $quote) = @_;
1315     my(@words, $result, $rest, $lead, $trail);
1316
1317     # convert double-quotes to single-quotes
1318     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1319     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1320
1321     $$quote = ($text =~ m/"/ ? 1 : 0);
1322     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1323
1324     # keep track of leading and trailing white-space
1325     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1326     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1327
1328     # collapse all white space into a single space
1329     $text =~ s/\s+/ /g;
1330     @words = split(" ", $text);
1331
1332     # process each word individually
1333     foreach my $word (@words) {
1334         # see if we can infer a link
1335         if ($word =~ /^\w+\(/) {
1336             # has parenthesis so should have been a C<> ref
1337             $word = process_C($word);
1338 #           $word =~ /^[^()]*]\(/;
1339 #           if (defined $items{$1} && $items{$1}) {
1340 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1341 #                       . htmlify(0,$word)
1342 #                       . "\">$word</A></CODE>";
1343 #           } elsif (defined $items{$word} && $items{$word}) {
1344 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1345 #                       . htmlify(0,$word)
1346 #                       . "\">$word</A></CODE>";
1347 #           } else {
1348 #               $word =   "\n<CODE><A HREF=\"#item_"
1349 #                       . htmlify(0,$word)
1350 #                       . "\">$word</A></CODE>";
1351 #           }
1352         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1353             # perl variables, should be a C<> ref
1354             $word = process_C($word, 1);
1355         } elsif ($word =~ m,^\w+://\w,) {
1356             # looks like a URL
1357             # Don't relativize it: leave it as the author intended
1358             $word = qq(<A HREF="$word">$word</A>);
1359         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1360             # looks like an e-mail address
1361             my ($w1, $w2, $w3) = ("", $word, "");
1362             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1363             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1364             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1365         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1366             $word = html_escape($word) if $word =~ /["&<>]/;
1367             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1368         } else {
1369             $word = html_escape($word) if $word =~ /["&<>]/;
1370         }
1371     }
1372
1373     # build a new string based upon our conversion
1374     $result = "";
1375     $rest   = join(" ", @words);
1376     while (length($rest) > 75) {
1377         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1378              $rest =~ m/^(\S*)\s(.*?)$/o) {
1379
1380             $result .= "$1\n";
1381             $rest    = $2;
1382         } else {
1383             $result .= "$rest\n";
1384             $rest    = "";
1385         }
1386     }
1387     $result .= $rest if $rest;
1388
1389     # restore the leading and trailing white-space
1390     $result = "$lead$result$trail";
1391
1392     return $result;
1393 }
1394
1395 #
1396 # pre_escape - convert & in text to $amp;
1397 #
1398 sub pre_escape {
1399     my($str) = @_;
1400     $$str =~ s/&(?!\w+;|#)/&amp;/g;     # XXX not bulletproof
1401 }
1402
1403 #
1404 # dosify - convert filenames to 8.3
1405 #
1406 sub dosify {
1407     my($str) = @_;
1408     if ($Is83) {
1409         $str = lc $str;
1410         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1411         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1412     }
1413     return $str;
1414 }
1415
1416 #
1417 # process_L - convert a pod L<> directive to a corresponding HTML link.
1418 #  most of the links made are inferred rather than known about directly
1419 #  (i.e it's not known whether the =head\d section exists in the target file,
1420 #   or whether a .pod file exists in the case of split files).  however, the
1421 #  guessing usually works.
1422 #
1423 # Unlike the other directives, this should be called with an unprocessed
1424 # string, else tags in the link won't be matched.
1425 #
1426 sub process_L {
1427     my($str) = @_;
1428     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1429
1430     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1431     $s1 = $str;
1432     for ($s1) {
1433         # LREF: a la HREF L<show this text|man/section>
1434         $linktext = $1 if s:^([^|]+)\|::;
1435
1436         # make sure sections start with a /
1437         s,^",/",g;
1438         s,^,/,g if (!m,/, && / /);
1439
1440         # check if there's a section specified
1441         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1442             ($page, $section) = ($1, $2);
1443         } else {                        # no
1444             ($page, $section) = ($str, "");
1445         }
1446
1447         # check if we know that this is a section in this page
1448         if (!defined $pages{$page} && defined $sections{$page}) {
1449             $section = $page;
1450             $page = "";
1451         }
1452
1453         # remove trailing punctuation, like ()
1454         $section =~ s/\W*$// ;
1455     }
1456
1457     $page83=dosify($page);
1458     $page=$page83 if (defined $pages{$page83});
1459     if ($page eq "") {
1460         $link = "#" . htmlify(0,$section);
1461         $linktext = $section unless defined($linktext);
1462     } elsif ( $page =~ /::/ ) {
1463         $linktext  = ($section ? "$section" : "$page");
1464         $page =~ s,::,/,g;
1465         # Search page cache for an entry keyed under the html page name,
1466         # then look to see what directory that page might be in.  NOTE:
1467         # this will only find one page. A better solution might be to produce
1468         # an intermediate page that is an index to all such pages.
1469         my $page_name = $page ;
1470         $page_name =~ s,^.*/,, ;
1471         if ( defined( $pages{ $page_name } ) &&
1472              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1473            ) {
1474             $page = $1 ;
1475         }
1476         else {
1477             # NOTE: This branch assumes that all A::B pages are located in
1478             # $htmlroot/A/B.html . This is often incorrect, since they are
1479             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1480             # analyze the contents of %pages and figure out where any
1481             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1482             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1483             # lib/A/B.pm. This is also limited, but it's an improvement.
1484             # Maybe a hints file so that the links point to the correct places
1485             # non-theless?
1486             # Also, maybe put a warn "$0: cannot resolve..." here.
1487         }
1488         $link = "$htmlroot/$page.html";
1489         $link .= "#" . htmlify(0,$section) if ($section);
1490     } elsif (!defined $pages{$page}) {
1491         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1492         $link = "";
1493         $linktext = $page unless defined($linktext);
1494     } else {
1495         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1496         $section = htmlify(0,$section) if $section ne "";
1497
1498         # if there is a directory by the name of the page, then assume that an
1499         # appropriate section will exist in the subdirectory
1500 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1501         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1502             $link = "$htmlroot/$1/$section.html";
1503
1504         # since there is no directory by the name of the page, the section will
1505         # have to exist within a .html of the same name.  thus, make sure there
1506         # is a .pod or .pm that might become that .html
1507         } else {
1508             $section = "#$section";
1509             # check if there is a .pod with the page name
1510             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1511                 $link = "$htmlroot/$1.html$section";
1512             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1513                 $link = "$htmlroot/$1.html$section";
1514             } else {
1515                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1516                              "no .pod or .pm found\n";
1517                 $link = "";
1518                 $linktext = $section unless defined($linktext);
1519             }
1520         }
1521     }
1522
1523     process_text(\$linktext, 0);
1524     if ($link) {
1525         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1526         # implies $htmlroot eq ''. This means that the link in question
1527         # needs a prefix of $htmldir if it begins with '/'. The test for
1528         # the initial '/' is done to avoid '#'-only links, and to allow
1529         # for other kinds of links, like file:, ftp:, etc.
1530         my $url ;
1531         if (  $htmlfileurl ne '' ) {
1532             $link = "$htmldir$link"
1533                 if ( $link =~ m{^/} ) ;
1534
1535             $url = relativize_url( $link, $htmlfileurl ) ;
1536 # print( "  b: [$link,$htmlfileurl,$url]\n" ) ;
1537         }
1538         else {
1539             $url = $link ;
1540         }
1541
1542         $s1 = "<A HREF=\"$url\">$linktext</A>";
1543     } else {
1544         $s1 = "<EM>$linktext</EM>";
1545     }
1546     return $s1;
1547 }
1548
1549 #
1550 # relativize_url - convert an absolute URL to one relative to a base URL.
1551 # Assumes both end in a filename.
1552 #
1553 sub relativize_url {
1554     my ($dest,$source) = @_ ;
1555
1556     my ($dest_volume,$dest_directory,$dest_file) =
1557         File::Spec::Unix->splitpath( $dest ) ;
1558     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1559
1560     my ($source_volume,$source_directory,$source_file) =
1561         File::Spec::Unix->splitpath( $source ) ;
1562     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1563
1564     my $rel_path = '' ;
1565     if ( $dest ne '' ) {
1566        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1567     }
1568
1569     if ( $rel_path ne ''                &&
1570          substr( $rel_path, -1 ) ne '/' &&
1571          substr( $dest_file, 0, 1 ) ne '#'
1572         ) {
1573         $rel_path .= "/$dest_file" ;
1574     }
1575     else {
1576         $rel_path .= "$dest_file" ;
1577     }
1578
1579     return $rel_path ;
1580 }
1581
1582 #
1583 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1584 # convert them to corresponding HTML directives.
1585 #
1586 sub process_BFI {
1587     my($tag, $str) = @_;
1588     my($s1);                    # work string
1589     my(%repltext) = (   'B' => 'STRONG',
1590                         'F' => 'EM',
1591                         'I' => 'EM');
1592
1593     # extract the modified text and convert to HTML
1594     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1595     return $s1;
1596 }
1597
1598 #
1599 # process_C - process the C<> pod-escape.
1600 #
1601 sub process_C {
1602     my($str, $doref) = @_;
1603     my($s1, $s2);
1604
1605     $s1 = $str;
1606     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1607     $s2 = $s1;
1608     $s1 =~ s/\W//g;             # delete bogus characters
1609     $str = html_escape($str);
1610
1611     # if there was a pod file that we found earlier with an appropriate
1612     # =item directive, then create a link to that page.
1613     if ($doref && defined $items{$s1}) {
1614         if ( $items{$s1} ) {
1615             my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
1616             # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1617             # implies $htmlroot eq ''.
1618             my $url ;
1619             if (  $htmlfileurl ne '' ) {
1620                 $link = "$htmldir$link" ;
1621                 $url = relativize_url( $link, $htmlfileurl ) ;
1622             }
1623             else {
1624                 $url = $link ;
1625             }
1626             $s1 = "<A HREF=\"$url\">$str</A>" ;
1627         }
1628         else {
1629             $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>" ;
1630         }
1631         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1632         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1633     } else {
1634         $s1 = "<CODE>$str</CODE>";
1635         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1636     }
1637
1638
1639     return $s1;
1640 }
1641
1642 #
1643 # process_E - process the E<> pod directive which seems to escape a character.
1644 #
1645 sub process_E {
1646     my($str) = @_;
1647
1648     for ($str) {
1649         s,([^/].*),\&$1\;,g;
1650     }
1651
1652     return $str;
1653 }
1654
1655 #
1656 # process_Z - process the Z<> pod directive which really just amounts to
1657 # ignoring it.  this allows someone to start a paragraph with an =
1658 #
1659 sub process_Z {
1660     my($str) = @_;
1661
1662     # there is no equivalent in HTML for this so just ignore it.
1663     $str = "";
1664     return $str;
1665 }
1666
1667 #
1668 # process_S - process the S<> pod directive which means to convert all
1669 # spaces in the string to non-breaking spaces (in HTML-eze).
1670 #
1671 sub process_S {
1672     my($str) = @_;
1673
1674     # convert all spaces in the text to non-breaking spaces in HTML.
1675     $str =~ s/ /&nbsp;/g;
1676     return $str;
1677 }
1678
1679 #
1680 # process_X - this is supposed to make an index entry.  we'll just
1681 # ignore it.
1682 #
1683 sub process_X {
1684     return '';
1685 }
1686
1687
1688 #
1689 # Adapted from Nick Ing-Simmons' PodToHtml package.
1690 sub relative_url {
1691     my $source_file = shift ;
1692     my $destination_file = shift;
1693
1694     my $source = URI::file->new_abs($source_file);
1695     my $uo = URI::file->new($destination_file,$source)->abs;
1696     return $uo->rel->as_string;
1697 }
1698
1699
1700 #
1701 # finish_list - finish off any pending HTML lists.  this should be called
1702 # after the entire pod file has been read and converted.
1703 #
1704 sub finish_list {
1705     while ($listlevel > 0) {
1706         print HTML "</DL>\n";
1707         $listlevel--;
1708     }
1709 }
1710
1711 #
1712 # htmlify - converts a pod section specification to a suitable section
1713 # specification for HTML.  if first arg is 1, only takes 1st word.
1714 #
1715 sub htmlify {
1716     my($compact, $heading) = @_;
1717
1718     if ($compact) {
1719       $heading =~ /^(\w+)/;
1720       $heading = $1;
1721     }
1722
1723   # $heading = lc($heading);
1724   $heading =~ s/[^\w\s]/_/g;
1725   $heading =~ s/(\s+)/ /g;
1726   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1727   $heading =~ s/ /_/g;
1728   $heading =~ s/\A(.{32}).*\Z/$1/s;
1729   $heading =~ s/\s+\Z//;
1730   $heading =~ s/_{2,}/_/g;
1731
1732   return $heading;
1733 }
1734
1735 BEGIN {
1736 }
1737
1738 1;