lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::PathConvert 0.84 ;   # Used to do relative URLs
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.01;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files.
  54
  55 =item htmlroot
  56
  57     --htmlroot=name
  58
  59 Sets the base URL for the HTML files.  When cross-references are made,
  60 the HTML root is prepended to the URL.
  61
  62 =item infile
  63
  64     --infile=name
  65
  66 Specify the pod file to convert.  Input is taken from STDIN if no
  67 infile is specified.
  68
  69 =item outfile
  70
  71     --outfile=name
  72
  73 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  74 is specified.
  75
  76 =item podroot
  77
  78     --podroot=name
  79
  80 Specify the base directory for finding library pods.
  81
  82 =item podpath
  83
  84     --podpath=name:...:name
  85
  86 Specify which subdirectories of the podroot contain pod files whose
  87 HTML converted forms can be linked-to in cross-references.
  88
  89 =item libpods
  90
  91     --libpods=name:...:name
  92
  93 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  94
  95 =item netscape
  96
  97     --netscape
  98
  99 Use Netscape HTML directives when applicable.
 100
 101 =item nonetscape
 102
 103     --nonetscape
 104
 105 Do not use Netscape HTML directives (default).
 106
 107 =item index
 108
 109     --index
 110
 111 Generate an index at the top of the HTML file (default behaviour).
 112
 113 =item noindex
 114
 115     --noindex
 116
 117 Do not generate an index at the top of the HTML file.
 118
 119
 120 =item recurse
 121
 122     --recurse
 123
 124 Recurse into subdirectories specified in podpath (default behaviour).
 125
 126 =item norecurse
 127
 128     --norecurse
 129
 130 Do not recurse into subdirectories specified in podpath.
 131
 132 =item title
 133
 134     --title=title
 135
 136 Specify the title of the resulting HTML file.
 137
 138 =item verbose
 139
 140     --verbose
 141
 142 Display progress messages.
 143
 144 =back
 145
 146 =head1 EXAMPLE
 147
 148     pod2html("pod2html",
 149              "--podpath=lib:ext:pod:vms",
 150              "--podroot=/usr/src/perl",
 151              "--htmlroot=/perl/nmanual",
 152              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 153              "--recurse",
 154              "--infile=foo.pod",
 155              "--outfile=/perl/nmanual/foo.html");
 156
 157 =head1 AUTHOR
 158
 159 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 160
 161 =head1 BUGS
 162
 163 Has trouble with C<> etc in = commands.
 164
 165 =head1 SEE ALSO
 166
 167 L<perlpod>
 168
 169 =head1 COPYRIGHT
 170
 171 This program is distributed under the Artistic License.
 172
 173 =cut
 174
 175 my $dircache = "pod2html-dircache";
 176 my $itemcache = "pod2html-itemcache";
 177
 178 my @begin_stack = ();           # begin/end stack
 179
 180 my @libpods = ();               # files to search for links from C<> directives
 181 my $htmlroot = "/";             # http-server base directory from which all
 182                                 #   relative paths in $podpath stem.
 183 my $htmldir = "";               # The directory to which the html pages
 184                                 # will (eventually) be written.
 185 my $htmlfile = "";              # write to stdout by default
 186 my $htmlfileurl = "";           # The url that other files would use to
 187                                 # refer to this file.  This is only used
 188                                 # to make relative urls that point to
 189                                 # other files.
 190 my $podfile = "";               # read from stdin by default
 191 my @podpath = ();               # list of directories containing library pods.
 192 my $podroot = ".";              # filesystem base directory from which all
 193                                 #   relative paths in $podpath stem.
 194 my $recurse = 1;                # recurse on subdirectories in $podpath.
 195 my $verbose = 0;                # not verbose by default
 196 my $doindex = 1;                # non-zero if we should generate an index
 197 my $listlevel = 0;              # current list depth
 198 my @listitem = ();              # stack of HTML commands to use when a =item is
 199                                 #   encountered.  the top of the stack is the
 200                                 #   current list.
 201 my @listdata = ();              # similar to @listitem, but for the text after
 202                                 #   an =item
 203 my @listend = ();               # similar to @listitem, but the text to use to
 204                                 #   end the list.
 205 my $ignore = 1;                 # whether or not to format text.  we don't
 206                                 #   format text until we hit our first pod
 207                                 #   directive.
 208
 209 my %items_named = ();           # for the multiples of the same item in perlfunc
 210 my @items_seen = ();
 211 my $netscape = 0;               # whether or not to use netscape directives.
 212 my $title;                      # title to give the pod(s)
 213 my $top = 1;                    # true if we are at the top of the doc.  used
 214                                 #   to prevent the first <HR> directive.
 215 my $paragraph;                  # which paragraph we're processing (used
 216                                 #   for error messages)
 217 my %pages = ();                 # associative array used to find the location
 218                                 #   of pages referenced by L<> links.
 219 my %sections = ();              # sections within this page
 220 my %items = ();                 # associative array used to find the location
 221                                 #   of =item directives referenced by C<> links
 222 my $Is83;                       # is dos with short filenames (8.3)
 223
 224 sub init_globals {
 225 $dircache = "pod2html-dircache";
 226 $itemcache = "pod2html-itemcache";
 227
 228 @begin_stack = ();              # begin/end stack
 229
 230 @libpods = ();          # files to search for links from C<> directives
 231 $htmlroot = "/";                # http-server base directory from which all
 232                                 #   relative paths in $podpath stem.
 233 $htmlfile = "";         # write to stdout by default
 234 $podfile = "";          # read from stdin by default
 235 @podpath = ();          # list of directories containing library pods.
 236 $podroot = ".";         # filesystem base directory from which all
 237                                 #   relative paths in $podpath stem.
 238 $recurse = 1;           # recurse on subdirectories in $podpath.
 239 $verbose = 0;           # not verbose by default
 240 $doindex = 1;                   # non-zero if we should generate an index
 241 $listlevel = 0;         # current list depth
 242 @listitem = ();         # stack of HTML commands to use when a =item is
 243                                 #   encountered.  the top of the stack is the
 244                                 #   current list.
 245 @listdata = ();         # similar to @listitem, but for the text after
 246                                 #   an =item
 247 @listend = ();          # similar to @listitem, but the text to use to
 248                                 #   end the list.
 249 $ignore = 1;                    # whether or not to format text.  we don't
 250                                 #   format text until we hit our first pod
 251                                 #   directive.
 252
 253 @items_seen = ();
 254 %items_named = ();
 255 $netscape = 0;          # whether or not to use netscape directives.
 256 $title = '';                    # title to give the pod(s)
 257 $top = 1;                       # true if we are at the top of the doc.  used
 258                                 #   to prevent the first <HR> directive.
 259 $paragraph = '';                        # which paragraph we're processing (used
 260                                 #   for error messages)
 261 %sections = ();         # sections within this page
 262
 263 # These are not reinitialised here but are kept as a cache.
 264 # See get_cache and related cache management code.
 265 #%pages = ();                   # associative array used to find the location
 266                                 #   of pages referenced by L<> links.
 267 #%items = ();                   # associative array used to find the location
 268                                 #   of =item directives referenced by C<> links
 269 $Is83=$^O eq 'dos';
 270 }
 271
 272 sub pod2html {
 273     local(@ARGV) = @_;
 274     local($/);
 275     local $_;
 276
 277     init_globals();
 278
 279     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 280
 281     # cache of %pages and %items from last time we ran pod2html
 282
 283     #undef $opt_help if defined $opt_help;
 284
 285     # parse the command-line parameters
 286     parse_command_line();
 287
 288     # set some variables to their default values if necessary
 289     local *POD;
 290     unless (@ARGV && $ARGV[0]) {
 291         $podfile  = "-" unless $podfile;        # stdin
 292         open(POD, "<$podfile")
 293                 || die "$0: cannot open $podfile file for input: $!\n";
 294     } else {
 295         $podfile = $ARGV[0];  # XXX: might be more filenames
 296         *POD = *ARGV;
 297     }
 298     $htmlfile = "-" unless $htmlfile;   # stdout
 299     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 300     $htmldir =~ s#/$## ;                # so we don't get a //
 301     if (  $htmldir ne ''
 302           && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 303        )
 304     {
 305         $htmlfileurl= "$htmlroot/" . substr( $htmlfile, length( $htmldir ) + 1 );
 306     }
 307     File::PathConvert::setfstype( 'URL' ) ;
 308
 309     # read the pod a paragraph at a time
 310     warn "Scanning for sections in input file(s)\n" if $verbose;
 311     $/ = "";
 312     my @poddata  = <POD>;
 313     close(POD);
 314
 315     # scan the pod for =head[1-6] directives and build an index
 316     my $index = scan_headings(\%sections, @poddata);
 317
 318     unless($index) {
 319         warn "No pod in $podfile\n" if $verbose;
 320         return;
 321     }
 322
 323     # open the output file
 324     open(HTML, ">$htmlfile")
 325             || die "$0: cannot open $htmlfile file for output: $!\n";
 326
 327     # put a title in the HTML file if one wasn't specified
 328     if ($title eq '') {
 329         TITLE_SEARCH: {
 330             for (my $i = 0; $i < @poddata; $i++) {
 331                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 332                     for my $para ( @poddata[$i, $i+1] ) {
 333                         last TITLE_SEARCH
 334                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 335                     }
 336                 }
 337
 338             }
 339         }
 340     }
 341     if (!$title and $podfile =~ /\.pod$/) {
 342         # probably a split pod so take first =head[12] as title
 343         for (my $i = 0; $i < @poddata; $i++) {
 344             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 345         }
 346         warn "adopted '$title' as title for $podfile\n"
 347             if $verbose and $title;
 348     }
 349     if ($title) {
 350         $title =~ s/\s*\(.*\)//;
 351     } else {
 352         warn "$0: no title for $podfile";
 353         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 354         $title = ($podfile eq "-" ? 'No Title' : $1);
 355         warn "using $title" if $verbose;
 356     }
 357     print HTML <<END_OF_HEAD;
 358 <HTML>
 359 <HEAD>
 360 <TITLE>$title</TITLE>
 361 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 362 </HEAD>
 363
 364 <BODY>
 365
 366 END_OF_HEAD
 367
 368     # load/reload/validate/cache %pages and %items
 369     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 370
 371     # scan the pod for =item directives
 372     scan_items("", \%items, @poddata);
 373
 374     # put an index at the top of the file.  note, if $doindex is 0 we
 375     # still generate an index, but surround it with an html comment.
 376     # that way some other program can extract it if desired.
 377     $index =~ s/--+/-/g;
 378     print HTML "<!-- INDEX BEGIN -->\n";
 379     print HTML "<!--\n" unless $doindex;
 380     print HTML $index;
 381     print HTML "-->\n" unless $doindex;
 382     print HTML "<!-- INDEX END -->\n\n";
 383     print HTML "<HR>\n" if $doindex;
 384
 385     # now convert this file
 386     warn "Converting input file\n" if $verbose;
 387     foreach my $i (0..$#poddata) {
 388         $_ = $poddata[$i];
 389         $paragraph = $i+1;
 390         if (/^(=.*)/s) {        # is it a pod directive?
 391             $ignore = 0;
 392             $_ = $1;
 393             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 394                 process_begin($1, $2);
 395             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 396                 process_end($1, $2);
 397             } elsif (/^=cut/) {                 # =cut
 398                 process_cut();
 399             } elsif (/^=pod/) {                 # =pod
 400                 process_pod();
 401             } else {
 402                 next if @begin_stack && $begin_stack[-1] ne 'html';
 403
 404                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 405                     process_head($1, $2);
 406                 } elsif (/^=item\s*(.*\S)/sm) { # =item text
 407                     process_item($1);
 408                 } elsif (/^=over\s*(.*)/) {             # =over N
 409                     process_over();
 410                 } elsif (/^=back/) {            # =back
 411                     process_back();
 412                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 413                     process_for($1,$2);
 414                 } else {
 415                     /^=(\S*)\s*/;
 416                     warn "$0: $podfile: unknown pod directive '$1' in "
 417                        . "paragraph $paragraph.  ignoring.\n";
 418                 }
 419             }
 420             $top = 0;
 421         }
 422         else {
 423             next if $ignore;
 424             next if @begin_stack && $begin_stack[-1] ne 'html';
 425             my $text = $_;
 426             process_text(\$text, 1);
 427             print HTML "<P>\n$text";
 428         }
 429     }
 430
 431     # finish off any pending directives
 432     finish_list();
 433     print HTML <<END_OF_TAIL;
 434 </BODY>
 435
 436 </HTML>
 437 END_OF_TAIL
 438
 439     # close the html file
 440     close(HTML);
 441
 442     warn "Finished\n" if $verbose;
 443 }
 444
 445 ##############################################################################
 446
 447 my $usage;                      # see below
 448 sub usage {
 449     my $podfile = shift;
 450     warn "$0: $podfile: @_\n" if @_;
 451     die $usage;
 452 }
 453
 454 $usage =<<END_OF_USAGE;
 455 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 456            --podpath=<name>:...:<name> --podroot=<name>
 457            --libpods=<name>:...:<name> --recurse --verbose --index
 458            --netscape --norecurse --noindex
 459
 460   --flush      - flushes the item and directory caches.
 461   --help       - prints this message.
 462   --htmlroot   - http-server base directory from which all relative paths
 463                  in podpath stem (default is /).
 464   --index      - generate an index at the top of the resulting html
 465                  (default).
 466   --infile     - filename for the pod to convert (input taken from stdin
 467                  by default).
 468   --libpods    - colon-separated list of pages to search for =item pod
 469                  directives in as targets of C<> and implicit links (empty
 470                  by default).  note, these are not filenames, but rather
 471                  page names like those that appear in L<> links.
 472   --netscape   - will use netscape html directives when applicable.
 473   --nonetscape - will not use netscape directives (default).
 474   --outfile    - filename for the resulting html file (output sent to
 475                  stdout by default).
 476   --podpath    - colon-separated list of directories containing library
 477                  pods.  empty by default.
 478   --podroot    - filesystem base directory from which all relative paths
 479                  in podpath stem (default is .).
 480   --noindex    - don't generate an index at the top of the resulting html.
 481   --norecurse  - don't recurse on those subdirectories listed in podpath.
 482   --recurse    - recurse on those subdirectories listed in podpath
 483                  (default behavior).
 484   --title      - title that will appear in resulting html file.
 485   --verbose    - self-explanatory
 486
 487 END_OF_USAGE
 488
 489 sub parse_command_line {
 490     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile
 491 ,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecur
 492 se,$opt_recurse,$opt_title,$opt_verbose);
 493     my $result = GetOptions(
 494                             'flush'      => \$opt_flush,
 495                             'help'       => \$opt_help,
 496                             'htmldir=s'  => \$opt_htmldir,
 497                             'htmlroot=s' => \$opt_htmlroot,
 498                             'index!'     => \$opt_index,
 499                             'infile=s'   => \$opt_infile,
 500                             'libpods=s'  => \$opt_libpods,
 501                             'netscape!'  => \$opt_netscape,
 502                             'outfile=s'  => \$opt_outfile,
 503                             'podpath=s'  => \$opt_podpath,
 504                             'podroot=s'  => \$opt_podroot,
 505                             'norecurse'  => \$opt_norecurse,
 506                             'recurse!'   => \$opt_recurse,
 507                             'title=s'    => \$opt_title,
 508                             'verbose'    => \$opt_verbose,
 509                            );
 510     usage("-", "invalid parameters") if not $result;
 511
 512     usage("-") if defined $opt_help;    # see if the user asked for help
 513     $opt_help = "";                     # just to make -w shut-up.
 514
 515     $podfile  = $opt_infile if defined $opt_infile;
 516     $htmlfile = $opt_outfile if defined $opt_outfile;
 517     $htmldir  = $opt_htmldir if defined $opt_outfile;
 518
 519     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 520     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 521
 522     warn "Flushing item and directory caches\n"
 523         if $opt_verbose && defined $opt_flush;
 524     unlink($dircache, $itemcache) if defined $opt_flush;
 525
 526     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 527     $podroot  = $opt_podroot if defined $opt_podroot;
 528
 529     $doindex  = $opt_index if defined $opt_index;
 530     $recurse  = $opt_recurse if defined $opt_recurse;
 531     $title    = $opt_title if defined $opt_title;
 532     $verbose  = defined $opt_verbose ? 1 : 0;
 533     $netscape = $opt_netscape if defined $opt_netscape;
 534 }
 535
 536
 537 my $saved_cache_key;
 538
 539 sub get_cache {
 540     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 541     my @cache_key_args = @_;
 542
 543     # A first-level cache:
 544     # Don't bother reading the cache files if they still apply
 545     # and haven't changed since we last read them.
 546
 547     my $this_cache_key = cache_key(@cache_key_args);
 548
 549     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 550
 551     # load the cache of %pages and %items if possible.  $tests will be
 552     # non-zero if successful.
 553     my $tests = 0;
 554     if (-f $dircache && -f $itemcache) {
 555         warn "scanning for item cache\n" if $verbose;
 556         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 557     }
 558
 559     # if we didn't succeed in loading the cache then we must (re)build
 560     #  %pages and %items.
 561     if (!$tests) {
 562         warn "scanning directories in pod-path\n" if $verbose;
 563         scan_podpath($podroot, $recurse, 0);
 564     }
 565     $saved_cache_key = cache_key(@cache_key_args);
 566 }
 567
 568 sub cache_key {
 569     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 570     return join('!', $dircache, $itemcache, $recurse,
 571                 @$podpath, $podroot, stat($dircache), stat($itemcache));
 572 }
 573
 574 #
 575 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 576 #  are valid caches of %pages and %items.  if they are valid then it loads
 577 #  them and returns a non-zero value.
 578 #
 579
 580 sub load_cache {
 581     my($dircache, $itemcache, $podpath, $podroot) = @_;
 582     my($tests);
 583     local $_;
 584
 585     $tests = 0;
 586
 587     open(CACHE, "<$itemcache") ||
 588         die "$0: error opening $itemcache for reading: $!\n";
 589     $/ = "\n";
 590
 591     # is it the same podpath?
 592     $_ = <CACHE>;
 593     chomp($_);
 594     $tests++ if (join(":", @$podpath) eq $_);
 595
 596     # is it the same podroot?
 597     $_ = <CACHE>;
 598     chomp($_);
 599     $tests++ if ($podroot eq $_);
 600
 601     # load the cache if its good
 602     if ($tests != 2) {
 603         close(CACHE);
 604         return 0;
 605     }
 606
 607     warn "loading item cache\n" if $verbose;
 608     while (<CACHE>) {
 609         /(.*?) (.*)$/;
 610         $items{$1} = $2;
 611     }
 612     close(CACHE);
 613
 614     warn "scanning for directory cache\n" if $verbose;
 615     open(CACHE, "<$dircache") ||
 616         die "$0: error opening $dircache for reading: $!\n";
 617     $/ = "\n";
 618     $tests = 0;
 619
 620     # is it the same podpath?
 621     $_ = <CACHE>;
 622     chomp($_);
 623     $tests++ if (join(":", @$podpath) eq $_);
 624
 625     # is it the same podroot?
 626     $_ = <CACHE>;
 627     chomp($_);
 628     $tests++ if ($podroot eq $_);
 629
 630     # load the cache if its good
 631     if ($tests != 2) {
 632         close(CACHE);
 633         return 0;
 634     }
 635
 636     warn "loading directory cache\n" if $verbose;
 637     while (<CACHE>) {
 638         /(.*?) (.*)$/;
 639         $pages{$1} = $2;
 640     }
 641
 642     close(CACHE);
 643
 644     return 1;
 645 }
 646
 647 #
 648 # scan_podpath - scans the directories specified in @podpath for directories,
 649 #  .pod files, and .pm files.  it also scans the pod files specified in
 650 #  @libpods for =item directives.
 651 #
 652 sub scan_podpath {
 653     my($podroot, $recurse, $append) = @_;
 654     my($pwd, $dir);
 655     my($libpod, $dirname, $pod, @files, @poddata);
 656
 657     unless($append) {
 658         %items = ();
 659         %pages = ();
 660     }
 661
 662     # scan each directory listed in @podpath
 663     $pwd = getcwd();
 664     chdir($podroot)
 665         || die "$0: error changing to directory $podroot: $!\n";
 666     foreach $dir (@podpath) {
 667         scan_dir($dir, $recurse);
 668     }
 669
 670     # scan the pods listed in @libpods for =item directives
 671     foreach $libpod (@libpods) {
 672         # if the page isn't defined then we won't know where to find it
 673         # on the system.
 674         next unless defined $pages{$libpod} && $pages{$libpod};
 675
 676         # if there is a directory then use the .pod and .pm files within it.
 677         if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 678             #  find all the .pod and .pm files within the directory
 679             $dirname = $1;
 680             opendir(DIR, $dirname) ||
 681                 die "$0: error opening directory $dirname: $!\n";
 682             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 683             closedir(DIR);
 684
 685             # scan each .pod and .pm file for =item directives
 686             foreach $pod (@files) {
 687                 open(POD, "<$dirname/$pod") ||
 688                     die "$0: error opening $dirname/$pod for input: $!\n";
 689                 @poddata = <POD>;
 690                 close(POD);
 691
 692                 scan_items("$dirname/$pod", @poddata);
 693             }
 694
 695             # use the names of files as =item directives too.
 696             foreach $pod (@files) {
 697                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 698                 $items{$1} = "$dirname/$1.html" if $1;
 699             }
 700         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 701                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 702             # scan the .pod or .pm file for =item directives
 703             $pod = $1;
 704             open(POD, "<$pod") ||
 705                 die "$0: error opening $pod for input: $!\n";
 706             @poddata = <POD>;
 707             close(POD);
 708
 709             scan_items("$pod", @poddata);
 710         } else {
 711             warn "$0: shouldn't be here (line ".__LINE__."\n";
 712         }
 713     }
 714     @poddata = ();      # clean-up a bit
 715
 716     chdir($pwd)
 717         || die "$0: error changing to directory $pwd: $!\n";
 718
 719     # cache the item list for later use
 720     warn "caching items for later use\n" if $verbose;
 721     open(CACHE, ">$itemcache") ||
 722         die "$0: error open $itemcache for writing: $!\n";
 723
 724     print CACHE join(":", @podpath) . "\n$podroot\n";
 725     foreach my $key (keys %items) {
 726         print CACHE "$key $items{$key}\n";
 727     }
 728
 729     close(CACHE);
 730
 731     # cache the directory list for later use
 732     warn "caching directories for later use\n" if $verbose;
 733     open(CACHE, ">$dircache") ||
 734         die "$0: error open $dircache for writing: $!\n";
 735
 736     print CACHE join(":", @podpath) . "\n$podroot\n";
 737     foreach my $key (keys %pages) {
 738         print CACHE "$key $pages{$key}\n";
 739     }
 740
 741     close(CACHE);
 742 }
 743
 744 #
 745 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 746 #  files, and .pm files.  notes those that it finds.  this information will
 747 #  be used later in order to figure out where the pages specified in L<>
 748 #  links are on the filesystem.
 749 #
 750 sub scan_dir {
 751     my($dir, $recurse) = @_;
 752     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 753     local $_;
 754
 755     @subdirs = ();
 756     @pods = ();
 757
 758     opendir(DIR, $dir) ||
 759         die "$0: error opening directory $dir: $!\n";
 760     while (defined($_ = readdir(DIR))) {
 761         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 762             $pages{$_}  = "" unless defined $pages{$_};
 763             $pages{$_} .= "$dir/$_:";
 764             push(@subdirs, $_);
 765         } elsif (/\.pod$/) {                                # .pod
 766             s/\.pod$//;
 767             $pages{$_}  = "" unless defined $pages{$_};
 768             $pages{$_} .= "$dir/$_.pod:";
 769             push(@pods, "$dir/$_.pod");
 770         } elsif (/\.pm$/) {                                 # .pm
 771             s/\.pm$//;
 772             $pages{$_}  = "" unless defined $pages{$_};
 773             $pages{$_} .= "$dir/$_.pm:";
 774             push(@pods, "$dir/$_.pm");
 775         }
 776     }
 777     closedir(DIR);
 778
 779     # recurse on the subdirectories if necessary
 780     if ($recurse) {
 781         foreach my $subdir (@subdirs) {
 782             scan_dir("$dir/$subdir", $recurse);
 783         }
 784     }
 785 }
 786
 787 #
 788 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 789 #  build an index.
 790 #
 791 sub scan_headings {
 792     my($sections, @data) = @_;
 793     my($tag, $which_head, $title, $listdepth, $index);
 794
 795     # here we need      local $ignore = 0;
 796     #  unfortunately, we can't have it, because $ignore is lexical
 797     $ignore = 0;
 798
 799     $listdepth = 0;
 800     $index = "";
 801
 802     # scan for =head directives, note their name, and build an index
 803     #  pointing to each of them.
 804     foreach my $line (@data) {
 805         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 806             ($tag,$which_head, $title) = ($1,$2,$3);
 807             chomp($title);
 808             $$sections{htmlify(0,$title)} = 1;
 809
 810             while ($which_head != $listdepth) {
 811                 if ($which_head > $listdepth) {
 812                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 813                     $listdepth++;
 814                 } elsif ($which_head < $listdepth) {
 815                     $listdepth--;
 816                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 817                 }
 818             }
 819
 820             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 821                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 822                       html_escape(process_text(\$title, 0)) . "</A>";
 823         }
 824     }
 825
 826     # finish off the lists
 827     while ($listdepth--) {
 828         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 829     }
 830
 831     # get rid of bogus lists
 832     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 833
 834     $ignore = 1;        # restore old value;
 835
 836     return $index;
 837 }
 838
 839 #
 840 # scan_items - scans the pod specified by $pod for =item directives.  we
 841 #  will use this information later on in resolving C<> links.
 842 #
 843 sub scan_items {
 844     my($pod, @poddata) = @_;
 845     my($i, $item);
 846     local $_;
 847
 848     $pod =~ s/\.pod$//;
 849     $pod .= ".html" if $pod;
 850
 851     foreach $i (0..$#poddata) {
 852         $_ = $poddata[$i];
 853
 854         # remove any formatting instructions
 855         s,[A-Z]<([^<>]*)>,$1,g;
 856
 857         # figure out what kind of item it is and get the first word of
 858         #  it's name.
 859         if (/^=item\s+(\w*)\s*.*$/s) {
 860             if ($1 eq "*") {            # bullet list
 861                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 862                 $item = $1;
 863             } elsif ($1 =~ /^\d+/) {    # numbered list
 864                 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
 865                 $item = $1;
 866             } else {
 867 #               /\A=item\s+(.*?)\s*\Z/s;
 868                 /\A=item\s+(\w*)/s;
 869                 $item = $1;
 870             }
 871
 872             $items{$item} = "$pod" if $item;
 873         }
 874     }
 875 }
 876
 877 #
 878 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 879 #
 880 sub process_head {
 881     my($tag, $heading) = @_;
 882     my $firstword;
 883
 884     # figure out the level of the =head
 885     $tag =~ /head([1-6])/;
 886     my $level = $1;
 887
 888     # can't have a heading full of spaces and speechmarks and so on
 889     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 890
 891     print HTML "<P>\n" unless $listlevel;
 892     print HTML "<HR>\n" unless $listlevel || $top;
 893     print HTML "<H$level>"; # unless $listlevel;
 894     #print HTML "<H$level>" unless $listlevel;
 895     my $convert = $heading; process_text(\$convert, 0);
 896     $convert = html_escape($convert);
 897     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 898     print HTML "</H$level>"; # unless $listlevel;
 899     print HTML "\n";
 900 }
 901
 902 #
 903 # process_item - convert a pod item tag and convert it to HTML format.
 904 #
 905 sub process_item {
 906     my $text = $_[0];
 907     my($i, $quote, $name);
 908
 909     my $need_preamble = 0;
 910     my $this_entry;
 911
 912
 913     # lots of documents start a list without doing an =over.  this is
 914     # bad!  but, the proper thing to do seems to be to just assume
 915     # they did do an =over.  so warn them once and then continue.
 916     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 917         unless $listlevel;
 918     process_over() unless $listlevel;
 919
 920     return unless $listlevel;
 921
 922     # remove formatting instructions from the text
 923     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 924     pre_escape(\$text);
 925
 926     $need_preamble = $items_seen[$listlevel]++ == 0;
 927
 928     # check if this is the first =item after an =over
 929     $i = $listlevel - 1;
 930     my $need_new = $listlevel >= @listitem;
 931
 932     if ($text =~ /\A\*/) {              # bullet
 933
 934         if ($need_preamble) {
 935             push(@listend,  "</UL>");
 936             print HTML "<UL>\n";
 937         }
 938
 939         print HTML '<LI>';
 940         if ($text =~ /\A\*\s*(.+)\Z/s) {
 941             print HTML '<STRONG>';
 942             if ($items_named{$1}++) {
 943                 print HTML html_escape($1);
 944             } else {
 945                 my $name = 'item_' . htmlify(1,$1);
 946                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 947             }
 948             print HTML '</STRONG>';
 949         }
 950
 951     } elsif ($text =~ /\A[\d#]+/) {     # numbered list
 952
 953         if ($need_preamble) {
 954             push(@listend,  "</OL>");
 955             print HTML "<OL>\n";
 956         }
 957
 958         print HTML '<LI>';
 959         if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
 960             print HTML '<STRONG>';
 961             if ($items_named{$1}++) {
 962                 print HTML html_escape($1);
 963             } else {
 964                 my $name = 'item_' . htmlify(0,$1);
 965                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 966             }
 967             print HTML '</STRONG>';
 968         }
 969
 970     } else {                    # all others
 971
 972         if ($need_preamble) {
 973             push(@listend,  '</DL>');
 974             print HTML "<DL>\n";
 975         }
 976
 977         print HTML '<DT>';
 978         if ($text =~ /(\S+)/) {
 979             print HTML '<STRONG>';
 980             if ($items_named{$1}++) {
 981                 print HTML html_escape($text);
 982             } else {
 983                 my $name = 'item_' . htmlify(1,$text);
 984                 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
 985             }
 986             print HTML '</STRONG>';
 987         }
 988        print HTML '<DD>';
 989     }
 990
 991     print HTML "\n";
 992 }
 993
 994 #
 995 # process_over - process a pod over tag and start a corresponding HTML
 996 # list.
 997 #
 998 sub process_over {
 999     # start a new list
1000     $listlevel++;
1001 }
1002
1003 #
1004 # process_back - process a pod back tag and convert it to HTML format.
1005 #
1006 sub process_back {
1007     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
1008         unless $listlevel;
1009     return unless $listlevel;
1010
1011     # close off the list.  note, I check to see if $listend[$listlevel] is
1012     # defined because an =item directive may have never appeared and thus
1013     # $listend[$listlevel] may have never been initialized.
1014     $listlevel--;
1015     print HTML $listend[$listlevel] if defined $listend[$listlevel];
1016     print HTML "\n";
1017
1018     # don't need the corresponding perl code anymore
1019     pop(@listitem);
1020     pop(@listdata);
1021     pop(@listend);
1022
1023     pop(@items_seen);
1024 }
1025
1026 #
1027 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1028 #
1029 sub process_cut {
1030     $ignore = 1;
1031 }
1032
1033 #
1034 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1035 # corresponding cut.
1036 #
1037 sub process_pod {
1038     # no need to set $ignore to 0 cause the main loop did it
1039 }
1040
1041 #
1042 # process_for - process a =for pod tag.  if it's for html, split
1043 # it out verbatim, if illustration, center it, otherwise ignore it.
1044 #
1045 sub process_for {
1046     my($whom, $text) = @_;
1047     if ( $whom =~ /^(pod2)?html$/i) {
1048         print HTML $text;
1049     } elsif ($whom =~ /^illustration$/i) {
1050         1 while chomp $text;
1051         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1052           $text .= $ext, last if -r "$text$ext";
1053         }
1054         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1055     }
1056 }
1057
1058 #
1059 # process_begin - process a =begin pod tag.  this pushes
1060 # whom we're beginning on the begin stack.  if there's a
1061 # begin stack, we only print if it us.
1062 #
1063 sub process_begin {
1064     my($whom, $text) = @_;
1065     $whom = lc($whom);
1066     push (@begin_stack, $whom);
1067     if ( $whom =~ /^(pod2)?html$/) {
1068         print HTML $text if $text;
1069     }
1070 }
1071
1072 #
1073 # process_end - process a =end pod tag.  pop the
1074 # begin stack.  die if we're mismatched.
1075 #
1076 sub process_end {
1077     my($whom, $text) = @_;
1078     $whom = lc($whom);
1079     if ($begin_stack[-1] ne $whom ) {
1080         die "Unmatched begin/end at chunk $paragraph\n"
1081     }
1082     pop @begin_stack;
1083 }
1084
1085 #
1086 # process_text - handles plaintext that appears in the input pod file.
1087 # there may be pod commands embedded within the text so those must be
1088 # converted to html commands.
1089 #
1090 sub process_text {
1091     my($text, $escapeQuotes) = @_;
1092     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1093     my($podcommand, $params, $tag, $quote);
1094
1095     return if $ignore;
1096
1097     $quote  = 0;                # status of double-quote conversion
1098     $result = "";
1099     $rest = $$text;
1100
1101     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1102         $rest =~ s/\n+\Z//;
1103         $rest =~ s#.*#
1104             my $line = $&;
1105             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1106             $line;
1107         #eg;
1108
1109         $rest   =~ s/&/&amp;/g;
1110         $rest   =~ s/</&lt;/g;
1111         $rest   =~ s/>/&gt;/g;
1112         $rest   =~ s/"/&quot;/g;
1113
1114         # try and create links for all occurrences of perl.* within
1115         # the preformatted text.
1116         $rest =~ s{
1117                     (\s*)(perl\w+)
1118                   }{
1119                     if (defined $pages{$2}) {   # is a link
1120                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1121                     } elsif (defined $pages{dosify($2)}) {      # is a link
1122                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1123                     } else {
1124                         "$1$2";
1125                     }
1126                   }xeg;
1127 #       $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1128         $rest =~ s{
1129                     (<A\ HREF="?)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?
1130                    }{
1131                      my $url=
1132                         File::PathConvert::abs2rel( "$3.html", $htmlfileurl );
1133 #                   print( "    $htmlfileurl $3.html [$url]\n" ) ;
1134                     "$1$url" ;
1135                   }xeg;
1136
1137   # Look for embedded URLs and make them in to links.  We don't
1138   # relativize them since they are best left as the author intended.
1139   my $urls = '(' . join ('|', qw{
1140                 http
1141                 telnet
1142                 mailto
1143                 news
1144                 gopher
1145                 file
1146                 wais
1147                 ftp
1148             } )
1149         . ')';
1150
1151   my $ltrs = '\w';
1152   my $gunk = '/#~:.?+=&%@!\-';
1153   my $punc = '.:?\-';
1154   my $any  = "${ltrs}${gunk}${punc}";
1155
1156   $rest =~ s{
1157         \b                          # start at word boundary
1158         (                           # begin $1  {
1159           $urls     :[^:]           # need resource and a colon
1160           [$any] +?                 # followed by on or more
1161                                     #  of any valid character, but
1162                                     #  be conservative and take only
1163                                     #  what you need to....
1164         )                           # end   $1  }
1165         (?=                         # look-ahead non-consumptive assertion
1166                 [$punc]*            # either 0 or more puntuation
1167                 [^$any]             #   followed by a non-url char
1168             |                       # or else
1169                 $                   #   then end of the string
1170         )
1171       }{<A HREF="$1">$1</A>}igox;
1172
1173         $result =   "<PRE>"     # text should be as it is (verbatim)
1174                   . "$rest\n"
1175                   . "</PRE>\n";
1176     } else {                    # formatted text
1177         # parse through the string, stopping each time we find a
1178         # pod-escape.  once the string has been throughly processed
1179         # we can output it.
1180         while (length $rest) {
1181             # check to see if there are any possible pod directives in
1182             # the remaining part of the text.
1183             if ($rest =~ m/[BCEIFLSZ]</) {
1184                 warn "\$rest\t= $rest\n" unless
1185                     $rest =~ /\A
1186                            ([^<]*?)
1187                            ([BCEIFLSZ]?)
1188                            <
1189                            (.*)\Z/xs;
1190
1191                 $s1 = $1;       # pure text
1192                 $s2 = $2;       # the type of pod-escape that follows
1193                 $s3 = '<';      # '<'
1194                 $s4 = $3;       # the rest of the string
1195             } else {
1196                 $s1 = $rest;
1197                 $s2 = "";
1198                 $s3 = "";
1199                 $s4 = "";
1200             }
1201
1202             if ($s3 eq '<' && $s2) {    # a pod-escape
1203                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1204                 $podcommand = "$s2<";
1205                 $rest       = $s4;
1206
1207                 # find the matching '>'
1208                 $match = 1;
1209                 $bf = 0;
1210                 while ($match && !$bf) {
1211                     $bf = 1;
1212                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1213                         $bf = 0;
1214                         $match++;
1215                         $podcommand .= $1;
1216                         $rest        = $2;
1217                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1218                         $bf = 0;
1219                         $match--;
1220                         $podcommand .= $1;
1221                         $rest        = $2;
1222                     }
1223                 }
1224
1225                 if ($match != 0) {
1226                     warn <<WARN;
1227 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1228 WARN
1229                     $result .= substr $podcommand, 0, 2;
1230                     $rest = substr($podcommand, 2) . $rest;
1231                     next;
1232                 }
1233
1234                 # pull out the parameters to the pod-escape
1235                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1236                 $tag    = $1;
1237                 $params = $2;
1238
1239                 # process the text within the pod-escape so that any escapes
1240                 # which must occur do.
1241                 process_text(\$params, 0) unless $tag eq 'L';
1242
1243                 $s1 = $params;
1244                 if (!$tag || $tag eq " ") {     #  <> : no tag
1245                     $s1 = "&lt;$params&gt;";
1246                 } elsif ($tag eq "L") {         # L<> : link
1247                     $s1 = process_L($params);
1248                 } elsif ($tag eq "I" ||         # I<> : italicize text
1249                          $tag eq "B" ||         # B<> : bold text
1250                          $tag eq "F") {         # F<> : file specification
1251                     $s1 = process_BFI($tag, $params);
1252                 } elsif ($tag eq "C") {         # C<> : literal code
1253                     $s1 = process_C($params, 1);
1254                 } elsif ($tag eq "E") {         # E<> : escape
1255                     $s1 = process_E($params);
1256                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1257                     $s1 = process_Z($params);
1258                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1259                     $s1 = process_S($params);
1260                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1261                     $s1 = process_X($params);
1262                 } else {
1263                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1264                 }
1265
1266                 $result .= "$s1";
1267             } else {
1268                 # for pure text we must deal with implicit links and
1269                 # double-quotes among other things.
1270                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1271                 $rest    = $s4;
1272             }
1273         }
1274     }
1275     $$text = $result;
1276 }
1277
1278 sub html_escape {
1279     my $rest = $_[0];
1280     $rest   =~ s/&/&amp;/g;
1281     $rest   =~ s/</&lt;/g;
1282     $rest   =~ s/>/&gt;/g;
1283     $rest   =~ s/"/&quot;/g;
1284     return $rest;
1285 }
1286
1287 #
1288 # process_puretext - process pure text (without pod-escapes) converting
1289 #  double-quotes and handling implicit C<> links.
1290 #
1291 sub process_puretext {
1292     my($text, $quote) = @_;
1293     my(@words, $result, $rest, $lead, $trail);
1294
1295     # convert double-quotes to single-quotes
1296     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1297     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1298
1299     $$quote = ($text =~ m/"/ ? 1 : 0);
1300     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1301
1302     # keep track of leading and trailing white-space
1303     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1304     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1305
1306     # collapse all white space into a single space
1307     $text =~ s/\s+/ /g;
1308     @words = split(" ", $text);
1309
1310     # process each word individually
1311     foreach my $word (@words) {
1312         # see if we can infer a link
1313         if ($word =~ /^\w+\(/) {
1314             # has parenthesis so should have been a C<> ref
1315             $word = process_C($word);
1316 #           $word =~ /^[^()]*]\(/;
1317 #           if (defined $items{$1} && $items{$1}) {
1318 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1319 #                       . htmlify(0,$word)
1320 #                       . "\">$word</A></CODE>";
1321 #           } elsif (defined $items{$word} && $items{$word}) {
1322 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1323 #                       . htmlify(0,$word)
1324 #                       . "\">$word</A></CODE>";
1325 #           } else {
1326 #               $word =   "\n<CODE><A HREF=\"#item_"
1327 #                       . htmlify(0,$word)
1328 #                       . "\">$word</A></CODE>";
1329 #           }
1330         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1331             # perl variables, should be a C<> ref
1332             $word = process_C($word, 1);
1333         } elsif ($word =~ m,^\w+://\w,) {
1334             # looks like a URL
1335             # Don't relativize it: leave it as the author intended
1336             $word = qq(<A HREF="$word">$word</A>);
1337         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1338             # looks like an e-mail address
1339             my ($w1, $w2, $w3) = ("", $word, "");
1340             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1341             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1342             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1343         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1344             $word = html_escape($word) if $word =~ /["&<>]/;
1345             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1346         } else {
1347             $word = html_escape($word) if $word =~ /["&<>]/;
1348         }
1349     }
1350
1351     # build a new string based upon our conversion
1352     $result = "";
1353     $rest   = join(" ", @words);
1354     while (length($rest) > 75) {
1355         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1356              $rest =~ m/^(\S*)\s(.*?)$/o) {
1357
1358             $result .= "$1\n";
1359             $rest    = $2;
1360         } else {
1361             $result .= "$rest\n";
1362             $rest    = "";
1363         }
1364     }
1365     $result .= $rest if $rest;
1366
1367     # restore the leading and trailing white-space
1368     $result = "$lead$result$trail";
1369
1370     return $result;
1371 }
1372
1373 #
1374 # pre_escape - convert & in text to $amp;
1375 #
1376 sub pre_escape {
1377     my($str) = @_;
1378
1379     $$str =~ s,&,&amp;,g;
1380 }
1381
1382 #
1383 # dosify - convert filenames to 8.3
1384 #
1385 sub dosify {
1386     my($str) = @_;
1387     if ($Is83) {
1388         $str = lc $str;
1389         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1390         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1391     }
1392     return $str;
1393 }
1394
1395 #
1396 # process_L - convert a pod L<> directive to a corresponding HTML link.
1397 #  most of the links made are inferred rather than known about directly
1398 #  (i.e it's not known whether the =head\d section exists in the target file,
1399 #   or whether a .pod file exists in the case of split files).  however, the
1400 #  guessing usually works.
1401 #
1402 # Unlike the other directives, this should be called with an unprocessed
1403 # string, else tags in the link won't be matched.
1404 #
1405 sub process_L {
1406     my($str) = @_;
1407     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1408
1409     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1410     $s1 = $str;
1411     for ($s1) {
1412         # LREF: a la HREF L<show this text|man/section>
1413         $linktext = $1 if s:^([^|]+)\|::;
1414
1415         # make sure sections start with a /
1416         s,^",/",g;
1417         s,^,/,g if (!m,/, && / /);
1418
1419         # check if there's a section specified
1420         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1421             ($page, $section) = ($1, $2);
1422         } else {                        # no
1423             ($page, $section) = ($str, "");
1424         }
1425
1426         # check if we know that this is a section in this page
1427         if (!defined $pages{$page} && defined $sections{$page}) {
1428             $section = $page;
1429             $page = "";
1430         }
1431     }
1432
1433     $page83=dosify($page);
1434     $page=$page83 if (defined $pages{$page83});
1435     if ($page eq "") {
1436         $link = "#" . htmlify(0,$section);
1437         $linktext = $section unless defined($linktext);
1438     } elsif ( $page =~ /::/ ) {
1439         $linktext  = ($section ? "$section" : "$page");
1440         $page =~ s,::,/,g;
1441         $link = "$htmlroot/$page.html";
1442         $link .= "#" . htmlify(0,$section) if ($section);
1443     } elsif (!defined $pages{$page}) {
1444         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n";
1445         $link = "";
1446         $linktext = $page unless defined($linktext);
1447     } else {
1448         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1449         $section = htmlify(0,$section) if $section ne "";
1450
1451         # if there is a directory by the name of the page, then assume that an
1452         # appropriate section will exist in the subdirectory
1453         if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1454             $link = "$htmlroot/$1/$section.html";
1455
1456         # since there is no directory by the name of the page, the section will
1457         # have to exist within a .html of the same name.  thus, make sure there
1458         # is a .pod or .pm that might become that .html
1459         } else {
1460             $section = "#$section";
1461             # check if there is a .pod with the page name
1462             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1463                 $link = "$htmlroot/$1.html$section";
1464             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1465                 $link = "$htmlroot/$1.html$section";
1466             } else {
1467                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1468                              "no .pod or .pm found\n";
1469                 $link = "";
1470                 $linktext = $section unless defined($linktext);
1471             }
1472         }
1473     }
1474
1475     process_text(\$linktext, 0);
1476     if ($link) {
1477         my $url= File::PathConvert::abs2rel( $link, $htmlfileurl ) ;
1478 #        print( "    $htmlfileurl $link [$url]\n" ) ;
1479         $s1 = "<A HREF=\"$url\">$linktext</A>";
1480     } else {
1481         $s1 = "<EM>$linktext</EM>";
1482     }
1483     return $s1;
1484 }
1485
1486 #
1487 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1488 # convert them to corresponding HTML directives.
1489 #
1490 sub process_BFI {
1491     my($tag, $str) = @_;
1492     my($s1);                    # work string
1493     my(%repltext) = (   'B' => 'STRONG',
1494                         'F' => 'EM',
1495                         'I' => 'EM');
1496
1497     # extract the modified text and convert to HTML
1498     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1499     return $s1;
1500 }
1501
1502 #
1503 # process_C - process the C<> pod-escape.
1504 #
1505 sub process_C {
1506     my($str, $doref) = @_;
1507     my($s1, $s2);
1508
1509     $s1 = $str;
1510     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1511     $s2 = $s1;
1512     $s1 =~ s/\W//g;             # delete bogus characters
1513     $str = html_escape($str);
1514
1515     # if there was a pod file that we found earlier with an appropriate
1516     # =item directive, then create a link to that page.
1517     if ($doref && defined $items{$s1}) {
1518         if ( $items{$s1} ) {
1519             my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
1520             my $url = File::PathConvert::abs2rel( $link, $htmlfileurl ) ;
1521 #            print( "    $htmlfileurl $link [$url]\n" ) ;
1522             $s1 = "<A HREF=\"$url\">$str</A>" ;
1523         }
1524         else {
1525             $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>" ;
1526         }
1527         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1528         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1529     } else {
1530         $s1 = "<CODE>$str</CODE>";
1531         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1532     }
1533
1534
1535     return $s1;
1536 }
1537
1538 #
1539 # process_E - process the E<> pod directive which seems to escape a character.
1540 #
1541 sub process_E {
1542     my($str) = @_;
1543
1544     for ($str) {
1545         s,([^/].*),\&$1\;,g;
1546     }
1547
1548     return $str;
1549 }
1550
1551 #
1552 # process_Z - process the Z<> pod directive which really just amounts to
1553 # ignoring it.  this allows someone to start a paragraph with an =
1554 #
1555 sub process_Z {
1556     my($str) = @_;
1557
1558     # there is no equivalent in HTML for this so just ignore it.
1559     $str = "";
1560     return $str;
1561 }
1562
1563 #
1564 # process_S - process the S<> pod directive which means to convert all
1565 # spaces in the string to non-breaking spaces (in HTML-eze).
1566 #
1567 sub process_S {
1568     my($str) = @_;
1569
1570     # convert all spaces in the text to non-breaking spaces in HTML.
1571     $str =~ s/ /&nbsp;/g;
1572     return $str;
1573 }
1574
1575 #
1576 # process_X - this is supposed to make an index entry.  we'll just
1577 # ignore it.
1578 #
1579 sub process_X {
1580     return '';
1581 }
1582
1583
1584 #
1585 # finish_list - finish off any pending HTML lists.  this should be called
1586 # after the entire pod file has been read and converted.
1587 #
1588 sub finish_list {
1589     while ($listlevel > 0) {
1590         print HTML "</DL>\n";
1591         $listlevel--;
1592     }
1593 }
1594
1595 #
1596 # htmlify - converts a pod section specification to a suitable section
1597 # specification for HTML.  if first arg is 1, only takes 1st word.
1598 #
1599 sub htmlify {
1600     my($compact, $heading) = @_;
1601
1602     if ($compact) {
1603       $heading =~ /^(\w+)/;
1604       $heading = $1;
1605     }
1606
1607   # $heading = lc($heading);
1608   $heading =~ s/[^\w\s]/_/g;
1609   $heading =~ s/(\s+)/ /g;
1610   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1611   $heading =~ s/ /_/g;
1612   $heading =~ s/\A(.{32}).*\Z/$1/s;
1613   $heading =~ s/\s+\Z//;
1614   $heading =~ s/_{2,}/_/g;
1615
1616   return $heading;
1617 }
1618
1619 BEGIN {
1620 }
1621
1622 1;