lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::Spec::Unix;
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.03;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files. Not passing this
  54 causes all links to be absolute, since this is the value that tells
  55 Pod::Html the root of the documentation tree.
  56
  57 =item htmlroot
  58
  59     --htmlroot=name
  60
  61 Sets the base URL for the HTML files.  When cross-references are made,
  62 the HTML root is prepended to the URL.
  63
  64 =item infile
  65
  66     --infile=name
  67
  68 Specify the pod file to convert.  Input is taken from STDIN if no
  69 infile is specified.
  70
  71 =item outfile
  72
  73     --outfile=name
  74
  75 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  76 is specified.
  77
  78 =item podroot
  79
  80     --podroot=name
  81
  82 Specify the base directory for finding library pods.
  83
  84 =item podpath
  85
  86     --podpath=name:...:name
  87
  88 Specify which subdirectories of the podroot contain pod files whose
  89 HTML converted forms can be linked-to in cross-references.
  90
  91 =item libpods
  92
  93     --libpods=name:...:name
  94
  95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  96
  97 =item netscape
  98
  99     --netscape
 100
 101 Use Netscape HTML directives when applicable.
 102
 103 =item nonetscape
 104
 105     --nonetscape
 106
 107 Do not use Netscape HTML directives (default).
 108
 109 =item index
 110
 111     --index
 112
 113 Generate an index at the top of the HTML file (default behaviour).
 114
 115 =item noindex
 116
 117     --noindex
 118
 119 Do not generate an index at the top of the HTML file.
 120
 121
 122 =item recurse
 123
 124     --recurse
 125
 126 Recurse into subdirectories specified in podpath (default behaviour).
 127
 128 =item norecurse
 129
 130     --norecurse
 131
 132 Do not recurse into subdirectories specified in podpath.
 133
 134 =item title
 135
 136     --title=title
 137
 138 Specify the title of the resulting HTML file.
 139
 140 =item css
 141
 142     --css=stylesheet
 143
 144 Specify the URL of a cascading style sheet.
 145
 146 =item verbose
 147
 148     --verbose
 149
 150 Display progress messages.
 151
 152 =item quiet
 153
 154     --quiet
 155
 156 Don't display I<mostly harmless> warning messages.
 157
 158 =back
 159
 160 =head1 EXAMPLE
 161
 162     pod2html("pod2html",
 163              "--podpath=lib:ext:pod:vms",
 164              "--podroot=/usr/src/perl",
 165              "--htmlroot=/perl/nmanual",
 166              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 167              "--recurse",
 168              "--infile=foo.pod",
 169              "--outfile=/perl/nmanual/foo.html");
 170
 171 =head1 ENVIRONMENT
 172
 173 Uses $Config{pod2html} to setup default options.
 174
 175 =head1 AUTHOR
 176
 177 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 178
 179 =head1 SEE ALSO
 180
 181 L<perlpod>
 182
 183 =head1 COPYRIGHT
 184
 185 This program is distributed under the Artistic License.
 186
 187 =cut
 188
 189 my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
 190 my $dircache = "pod2htmd$cache_ext";
 191 my $itemcache = "pod2htmi$cache_ext";
 192
 193 my @begin_stack = ();           # begin/end stack
 194
 195 my @libpods = ();               # files to search for links from C<> directives
 196 my $htmlroot = "/";             # http-server base directory from which all
 197                                 #   relative paths in $podpath stem.
 198 my $htmldir = "";               # The directory to which the html pages
 199                                 # will (eventually) be written.
 200 my $htmlfile = "";              # write to stdout by default
 201 my $htmlfileurl = "" ;          # The url that other files would use to
 202                                 # refer to this file.  This is only used
 203                                 # to make relative urls that point to
 204                                 # other files.
 205 my $podfile = "";               # read from stdin by default
 206 my @podpath = ();               # list of directories containing library pods.
 207 my $podroot = ".";              # filesystem base directory from which all
 208                                 #   relative paths in $podpath stem.
 209 my $css = '';                   # Cascading style sheet
 210 my $recurse = 1;                # recurse on subdirectories in $podpath.
 211 my $quiet = 0;                  # not quiet by default
 212 my $verbose = 0;                # not verbose by default
 213 my $doindex = 1;                # non-zero if we should generate an index
 214 my $listlevel = 0;              # current list depth
 215 my @listend = ();               # the text to use to end the list.
 216 my $after_lpar = 0;             # set to true after a par in an =item
 217 my $ignore = 1;                 # whether or not to format text.  we don't
 218                                 #   format text until we hit our first pod
 219                                 #   directive.
 220
 221 my %items_named = ();           # for the multiples of the same item in perlfunc
 222 my @items_seen = ();
 223 my $netscape = 0;               # whether or not to use netscape directives.
 224 my $title;                      # title to give the pod(s)
 225 my $header = 0;                 # produce block header/footer
 226 my $top = 1;                    # true if we are at the top of the doc.  used
 227                                 #   to prevent the first <HR> directive.
 228 my $paragraph;                  # which paragraph we're processing (used
 229                                 #   for error messages)
 230 my $ptQuote = 0;                # status of double-quote conversion
 231 my %pages = ();                 # associative array used to find the location
 232                                 #   of pages referenced by L<> links.
 233 my %sections = ();              # sections within this page
 234 my %items = ();                 # associative array used to find the location
 235                                 #   of =item directives referenced by C<> links
 236 my %local_items = ();           # local items - avoid destruction of %items
 237 my $Is83;                       # is dos with short filenames (8.3)
 238
 239 sub init_globals {
 240 $dircache = "pod2htmd$cache_ext";
 241 $itemcache = "pod2htmi$cache_ext";
 242
 243 @begin_stack = ();              # begin/end stack
 244
 245 @libpods = ();          # files to search for links from C<> directives
 246 $htmlroot = "/";                # http-server base directory from which all
 247                                 #   relative paths in $podpath stem.
 248 $htmlfile = "";         # write to stdout by default
 249 $podfile = "";          # read from stdin by default
 250 @podpath = ();          # list of directories containing library pods.
 251 $podroot = ".";         # filesystem base directory from which all
 252                                 #   relative paths in $podpath stem.
 253 $css = '';                   # Cascading style sheet
 254 $recurse = 1;           # recurse on subdirectories in $podpath.
 255 $quiet = 0;             # not quiet by default
 256 $verbose = 0;           # not verbose by default
 257 $doindex = 1;                   # non-zero if we should generate an index
 258 $listlevel = 0;         # current list depth
 259 @listend = ();          # the text to use to end the list.
 260 $after_lpar = 0;        # set to true after a par in an =item
 261 $ignore = 1;                    # whether or not to format text.  we don't
 262                                 #   format text until we hit our first pod
 263                                 #   directive.
 264
 265 @items_seen = ();
 266 %items_named = ();
 267 $netscape = 0;          # whether or not to use netscape directives.
 268 $header = 0;                    # produce block header/footer
 269 $title = '';                    # title to give the pod(s)
 270 $top = 1;                       # true if we are at the top of the doc.  used
 271                                 #   to prevent the first <HR> directive.
 272 $paragraph = '';                        # which paragraph we're processing (used
 273                                 #   for error messages)
 274 %sections = ();         # sections within this page
 275
 276 # These are not reinitialised here but are kept as a cache.
 277 # See get_cache and related cache management code.
 278 #%pages = ();                   # associative array used to find the location
 279                                 #   of pages referenced by L<> links.
 280 #%items = ();                   # associative array used to find the location
 281                                 #   of =item directives referenced by C<> links
 282 %local_items = ();
 283 $Is83=$^O eq 'dos';
 284 }
 285
 286 #
 287 # clean_data: global clean-up of pod data
 288 #
 289 sub clean_data($){
 290     my( $dataref ) = @_;
 291     my $i;
 292     for( $i = 0; $i <= $#$dataref; $i++ ){
 293         ${$dataref}[$i] =~ s/\s+\Z//;
 294
 295         # have a look for all-space lines
 296         if( ${$dataref}[$i] =~ /^\s+$/m ){
 297             my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
 298             splice( @$dataref, $i, 1, @chunks );
 299         }
 300     }
 301 }
 302
 303
 304 sub pod2html {
 305     local(@ARGV) = @_;
 306     local($/);
 307     local $_;
 308
 309     init_globals();
 310
 311     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 312
 313     # cache of %pages and %items from last time we ran pod2html
 314
 315     #undef $opt_help if defined $opt_help;
 316
 317     # parse the command-line parameters
 318     parse_command_line();
 319
 320     # set some variables to their default values if necessary
 321     local *POD;
 322     unless (@ARGV && $ARGV[0]) {
 323         $podfile  = "-" unless $podfile;        # stdin
 324         open(POD, "<$podfile")
 325                 || die "$0: cannot open $podfile file for input: $!\n";
 326     } else {
 327         $podfile = $ARGV[0];  # XXX: might be more filenames
 328         *POD = *ARGV;
 329     }
 330     $htmlfile = "-" unless $htmlfile;   # stdout
 331     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 332     $htmldir =~ s#/$## ;                # so we don't get a //
 333     if (  $htmlroot eq ''
 334        && defined( $htmldir )
 335        && $htmldir ne ''
 336        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 337        )
 338     {
 339         # Set the 'base' url for this file, so that we can use it
 340         # as the location from which to calculate relative links
 341         # to other files. If this is '', then absolute links will
 342         # be used throughout.
 343         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 344     }
 345
 346     # read the pod a paragraph at a time
 347     warn "Scanning for sections in input file(s)\n" if $verbose;
 348     $/ = "";
 349     my @poddata  = <POD>;
 350     close(POD);
 351     clean_data( \@poddata );
 352
 353     # scan the pod for =head[1-6] directives and build an index
 354     my $index = scan_headings(\%sections, @poddata);
 355
 356     unless($index) {
 357         warn "No headings in $podfile\n" if $verbose;
 358     }
 359
 360     # open the output file
 361     open(HTML, ">$htmlfile")
 362             || die "$0: cannot open $htmlfile file for output: $!\n";
 363
 364     # put a title in the HTML file if one wasn't specified
 365     if ($title eq '') {
 366         TITLE_SEARCH: {
 367             for (my $i = 0; $i < @poddata; $i++) {
 368                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 369                     for my $para ( @poddata[$i, $i+1] ) {
 370                         last TITLE_SEARCH
 371                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 372                     }
 373                 }
 374
 375             }
 376         }
 377     }
 378     if (!$title and $podfile =~ /\.pod$/) {
 379         # probably a split pod so take first =head[12] as title
 380         for (my $i = 0; $i < @poddata; $i++) {
 381             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 382         }
 383         warn "adopted '$title' as title for $podfile\n"
 384             if $verbose and $title;
 385     }
 386     if ($title) {
 387         $title =~ s/\s*\(.*\)//;
 388     } else {
 389         warn "$0: no title for $podfile" unless $quiet;
 390         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 391         $title = ($podfile eq "-" ? 'No Title' : $1);
 392         warn "using $title" if $verbose;
 393     }
 394     my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
 395     $csslink =~ s,\\,/,g;
 396     $csslink =~ s,(/.):,$1|,;
 397
 398     my $block = $header ? <<END_OF_BLOCK : '';
 399 <TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
 400 <TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
 401 <FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
 402 </TD></TR>
 403 </TABLE>
 404 END_OF_BLOCK
 405
 406     print HTML <<END_OF_HEAD;
 407 <HTML>
 408 <HEAD>
 409 <TITLE>$title</TITLE>$csslink
 410 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 411 </HEAD>
 412
 413 <BODY>
 414 $block
 415 END_OF_HEAD
 416
 417     # load/reload/validate/cache %pages and %items
 418     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 419
 420     # scan the pod for =item directives
 421     scan_items( \%local_items, "", @poddata);
 422
 423     # put an index at the top of the file.  note, if $doindex is 0 we
 424     # still generate an index, but surround it with an html comment.
 425     # that way some other program can extract it if desired.
 426     $index =~ s/--+/-/g;
 427     print HTML "<A NAME=\"__index__\"></A>\n";
 428     print HTML "<!-- INDEX BEGIN -->\n";
 429     print HTML "<!--\n" unless $doindex;
 430     print HTML $index;
 431     print HTML "-->\n" unless $doindex;
 432     print HTML "<!-- INDEX END -->\n\n";
 433     print HTML "<HR>\n" if $doindex and $index;
 434
 435     # now convert this file
 436     my $after_item;             # set to true after an =item
 437     warn "Converting input file $podfile\n" if $verbose;
 438     foreach my $i (0..$#poddata){
 439         $ptQuote = 0; # status of quote conversion
 440
 441         $_ = $poddata[$i];
 442         $paragraph = $i+1;
 443         if (/^(=.*)/s) {        # is it a pod directive?
 444             $ignore = 0;
 445             $after_item = 0;
 446             $_ = $1;
 447             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 448                 process_begin($1, $2);
 449             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 450                 process_end($1, $2);
 451             } elsif (/^=cut/) {                 # =cut
 452                 process_cut();
 453             } elsif (/^=pod/) {                 # =pod
 454                 process_pod();
 455             } else {
 456                 next if @begin_stack && $begin_stack[-1] ne 'html';
 457
 458                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 459                     process_head( $1, $2, $doindex && $index );
 460                 } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
 461                     warn "$0: $podfile: =item without bullet, number or text"
 462                        . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
 463                     process_item( $1 );
 464                     $after_item = 1;
 465                 } elsif (/^=over\s*(.*)/) {             # =over N
 466                     process_over();
 467                 } elsif (/^=back/) {            # =back
 468                     process_back();
 469                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 470                     process_for($1,$2);
 471                 } else {
 472                     /^=(\S*)\s*/;
 473                     warn "$0: $podfile: unknown pod directive '$1' in "
 474                        . "paragraph $paragraph.  ignoring.\n";
 475                 }
 476             }
 477             $top = 0;
 478         }
 479         else {
 480             next if $ignore;
 481             next if @begin_stack && $begin_stack[-1] ne 'html';
 482             my $text = $_;
 483             if( $text =~ /\A\s+/ ){
 484                 process_pre( \$text );
 485                 print HTML "<PRE>\n$text</PRE>\n";
 486
 487             } else {
 488                 process_text( \$text );
 489
 490                 # experimental: check for a paragraph where all lines
 491                 # have some ...\t...\t...\n pattern
 492                 if( $text =~ /\t/ ){
 493                     my @lines = split( "\n", $text );
 494                     if( @lines > 1 ){
 495                         my $all = 2;
 496                         foreach my $line ( @lines ){
 497                             if( $line =~ /\S/ && $line !~ /\t/ ){
 498                                 $all--;
 499                                 last if $all == 0;
 500                             }
 501                         }
 502                         if( $all > 0 ){
 503                             $text =~ s/\t+/<TD>/g;
 504                             $text =~ s/^/<TR><TD>/gm;
 505                             $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
 506                                     $text . '</TABLE>';
 507                         }
 508                     }
 509                 }
 510                 ## end of experimental
 511
 512                 if( $after_item ){
 513                     print HTML "$text\n";
 514                     $after_lpar = 1;
 515                 } else {
 516                     print HTML "<P>$text</P>\n";
 517                 }
 518             }
 519             $after_item = 0;
 520         }
 521     }
 522
 523     # finish off any pending directives
 524     finish_list();
 525
 526     # link to page index
 527     print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
 528         if $doindex and $index;
 529
 530     print HTML <<END_OF_TAIL;
 531 $block
 532 </BODY>
 533
 534 </HTML>
 535 END_OF_TAIL
 536
 537     # close the html file
 538     close(HTML);
 539
 540     warn "Finished\n" if $verbose;
 541 }
 542
 543 ##############################################################################
 544
 545 my $usage;                      # see below
 546 sub usage {
 547     my $podfile = shift;
 548     warn "$0: $podfile: @_\n" if @_;
 549     die $usage;
 550 }
 551
 552 $usage =<<END_OF_USAGE;
 553 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 554            --podpath=<name>:...:<name> --podroot=<name>
 555            --libpods=<name>:...:<name> --recurse --verbose --index
 556            --netscape --norecurse --noindex
 557
 558   --flush      - flushes the item and directory caches.
 559   --help       - prints this message.
 560   --htmlroot   - http-server base directory from which all relative paths
 561                  in podpath stem (default is /).
 562   --index      - generate an index at the top of the resulting html
 563                  (default).
 564   --infile     - filename for the pod to convert (input taken from stdin
 565                  by default).
 566   --libpods    - colon-separated list of pages to search for =item pod
 567                  directives in as targets of C<> and implicit links (empty
 568                  by default).  note, these are not filenames, but rather
 569                  page names like those that appear in L<> links.
 570   --netscape   - will use netscape html directives when applicable.
 571   --nonetscape - will not use netscape directives (default).
 572   --outfile    - filename for the resulting html file (output sent to
 573                  stdout by default).
 574   --podpath    - colon-separated list of directories containing library
 575                  pods.  empty by default.
 576   --podroot    - filesystem base directory from which all relative paths
 577                  in podpath stem (default is .).
 578   --noindex    - don't generate an index at the top of the resulting html.
 579   --norecurse  - don't recurse on those subdirectories listed in podpath.
 580   --recurse    - recurse on those subdirectories listed in podpath
 581                  (default behavior).
 582   --title      - title that will appear in resulting html file.
 583   --header     - produce block header/footer
 584   --css        - stylesheet URL
 585   --verbose    - self-explanatory
 586   --quiet      - supress some benign warning messages
 587
 588 END_OF_USAGE
 589
 590 sub parse_command_line {
 591     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
 592     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 593     my $result = GetOptions(
 594                             'flush'      => \$opt_flush,
 595                             'help'       => \$opt_help,
 596                             'htmldir=s'  => \$opt_htmldir,
 597                             'htmlroot=s' => \$opt_htmlroot,
 598                             'index!'     => \$opt_index,
 599                             'infile=s'   => \$opt_infile,
 600                             'libpods=s'  => \$opt_libpods,
 601                             'netscape!'  => \$opt_netscape,
 602                             'outfile=s'  => \$opt_outfile,
 603                             'podpath=s'  => \$opt_podpath,
 604                             'podroot=s'  => \$opt_podroot,
 605                             'recurse!'   => \$opt_recurse,
 606                             'title=s'    => \$opt_title,
 607                             'header'     => \$opt_header,
 608                             'css=s'      => \$opt_css,
 609                             'verbose'    => \$opt_verbose,
 610                             'quiet'      => \$opt_quiet,
 611                            );
 612     usage("-", "invalid parameters") if not $result;
 613
 614     usage("-") if defined $opt_help;    # see if the user asked for help
 615     $opt_help = "";                     # just to make -w shut-up.
 616
 617     $podfile  = $opt_infile if defined $opt_infile;
 618     $htmlfile = $opt_outfile if defined $opt_outfile;
 619     $htmldir  = $opt_htmldir if defined $opt_outfile;
 620
 621     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 622     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 623
 624     warn "Flushing item and directory caches\n"
 625         if $opt_verbose && defined $opt_flush;
 626     unlink($dircache, $itemcache) if defined $opt_flush;
 627
 628     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 629     $podroot  = $opt_podroot if defined $opt_podroot;
 630
 631     $doindex  = $opt_index if defined $opt_index;
 632     $recurse  = $opt_recurse if defined $opt_recurse;
 633     $title    = $opt_title if defined $opt_title;
 634     $header   = defined $opt_header ? 1 : 0;
 635     $css      = $opt_css if defined $opt_css;
 636     $verbose  = defined $opt_verbose ? 1 : 0;
 637     $quiet    = defined $opt_quiet ? 1 : 0;
 638     $netscape = $opt_netscape if defined $opt_netscape;
 639 }
 640
 641
 642 my $saved_cache_key;
 643
 644 sub get_cache {
 645     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 646     my @cache_key_args = @_;
 647
 648     # A first-level cache:
 649     # Don't bother reading the cache files if they still apply
 650     # and haven't changed since we last read them.
 651
 652     my $this_cache_key = cache_key(@cache_key_args);
 653
 654     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 655
 656     # load the cache of %pages and %items if possible.  $tests will be
 657     # non-zero if successful.
 658     my $tests = 0;
 659     if (-f $dircache && -f $itemcache) {
 660         warn "scanning for item cache\n" if $verbose;
 661         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 662     }
 663
 664     # if we didn't succeed in loading the cache then we must (re)build
 665     #  %pages and %items.
 666     if (!$tests) {
 667         warn "scanning directories in pod-path\n" if $verbose;
 668         scan_podpath($podroot, $recurse, 0);
 669     }
 670     $saved_cache_key = cache_key(@cache_key_args);
 671 }
 672
 673 sub cache_key {
 674     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 675     return join('!', $dircache, $itemcache, $recurse,
 676         @$podpath, $podroot, stat($dircache), stat($itemcache));
 677 }
 678
 679 #
 680 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 681 #  are valid caches of %pages and %items.  if they are valid then it loads
 682 #  them and returns a non-zero value.
 683 #
 684 sub load_cache {
 685     my($dircache, $itemcache, $podpath, $podroot) = @_;
 686     my($tests);
 687     local $_;
 688
 689     $tests = 0;
 690
 691     open(CACHE, "<$itemcache") ||
 692         die "$0: error opening $itemcache for reading: $!\n";
 693     $/ = "\n";
 694
 695     # is it the same podpath?
 696     $_ = <CACHE>;
 697     chomp($_);
 698     $tests++ if (join(":", @$podpath) eq $_);
 699
 700     # is it the same podroot?
 701     $_ = <CACHE>;
 702     chomp($_);
 703     $tests++ if ($podroot eq $_);
 704
 705     # load the cache if its good
 706     if ($tests != 2) {
 707         close(CACHE);
 708         return 0;
 709     }
 710
 711     warn "loading item cache\n" if $verbose;
 712     while (<CACHE>) {
 713         /(.*?) (.*)$/;
 714         $items{$1} = $2;
 715     }
 716     close(CACHE);
 717
 718     warn "scanning for directory cache\n" if $verbose;
 719     open(CACHE, "<$dircache") ||
 720         die "$0: error opening $dircache for reading: $!\n";
 721     $/ = "\n";
 722     $tests = 0;
 723
 724     # is it the same podpath?
 725     $_ = <CACHE>;
 726     chomp($_);
 727     $tests++ if (join(":", @$podpath) eq $_);
 728
 729     # is it the same podroot?
 730     $_ = <CACHE>;
 731     chomp($_);
 732     $tests++ if ($podroot eq $_);
 733
 734     # load the cache if its good
 735     if ($tests != 2) {
 736         close(CACHE);
 737         return 0;
 738     }
 739
 740     warn "loading directory cache\n" if $verbose;
 741     while (<CACHE>) {
 742         /(.*?) (.*)$/;
 743         $pages{$1} = $2;
 744     }
 745
 746     close(CACHE);
 747
 748     return 1;
 749 }
 750
 751 #
 752 # scan_podpath - scans the directories specified in @podpath for directories,
 753 #  .pod files, and .pm files.  it also scans the pod files specified in
 754 #  @libpods for =item directives.
 755 #
 756 sub scan_podpath {
 757     my($podroot, $recurse, $append) = @_;
 758     my($pwd, $dir);
 759     my($libpod, $dirname, $pod, @files, @poddata);
 760
 761     unless($append) {
 762         %items = ();
 763         %pages = ();
 764     }
 765
 766     # scan each directory listed in @podpath
 767     $pwd = getcwd();
 768     chdir($podroot)
 769         || die "$0: error changing to directory $podroot: $!\n";
 770     foreach $dir (@podpath) {
 771         scan_dir($dir, $recurse);
 772     }
 773
 774     # scan the pods listed in @libpods for =item directives
 775     foreach $libpod (@libpods) {
 776         # if the page isn't defined then we won't know where to find it
 777         # on the system.
 778         next unless defined $pages{$libpod} && $pages{$libpod};
 779
 780         # if there is a directory then use the .pod and .pm files within it.
 781         # NOTE: Only finds the first so-named directory in the tree.
 782 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 783         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 784             #  find all the .pod and .pm files within the directory
 785             $dirname = $1;
 786             opendir(DIR, $dirname) ||
 787                 die "$0: error opening directory $dirname: $!\n";
 788             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 789             closedir(DIR);
 790
 791             # scan each .pod and .pm file for =item directives
 792             foreach $pod (@files) {
 793                 open(POD, "<$dirname/$pod") ||
 794                     die "$0: error opening $dirname/$pod for input: $!\n";
 795                 @poddata = <POD>;
 796                 close(POD);
 797                 clean_data( \@poddata );
 798
 799                 scan_items( \%items, "$dirname/$pod", @poddata);
 800             }
 801
 802             # use the names of files as =item directives too.
 803 ### Don't think this should be done this way - confuses issues.(WL)
 804 ###         foreach $pod (@files) {
 805 ###             $pod =~ /^(.*)(\.pod|\.pm)$/;
 806 ###             $items{$1} = "$dirname/$1.html" if $1;
 807 ###         }
 808         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 809                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 810             # scan the .pod or .pm file for =item directives
 811             $pod = $1;
 812             open(POD, "<$pod") ||
 813                 die "$0: error opening $pod for input: $!\n";
 814             @poddata = <POD>;
 815             close(POD);
 816             clean_data( \@poddata );
 817
 818             scan_items( \%items, "$pod", @poddata);
 819         } else {
 820             warn "$0: shouldn't be here (line ".__LINE__."\n";
 821         }
 822     }
 823     @poddata = ();      # clean-up a bit
 824
 825     chdir($pwd)
 826         || die "$0: error changing to directory $pwd: $!\n";
 827
 828     # cache the item list for later use
 829     warn "caching items for later use\n" if $verbose;
 830     open(CACHE, ">$itemcache") ||
 831         die "$0: error open $itemcache for writing: $!\n";
 832
 833     print CACHE join(":", @podpath) . "\n$podroot\n";
 834     foreach my $key (keys %items) {
 835         print CACHE "$key $items{$key}\n";
 836     }
 837
 838     close(CACHE);
 839
 840     # cache the directory list for later use
 841     warn "caching directories for later use\n" if $verbose;
 842     open(CACHE, ">$dircache") ||
 843         die "$0: error open $dircache for writing: $!\n";
 844
 845     print CACHE join(":", @podpath) . "\n$podroot\n";
 846     foreach my $key (keys %pages) {
 847         print CACHE "$key $pages{$key}\n";
 848     }
 849
 850     close(CACHE);
 851 }
 852
 853 #
 854 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 855 #  files, and .pm files.  notes those that it finds.  this information will
 856 #  be used later in order to figure out where the pages specified in L<>
 857 #  links are on the filesystem.
 858 #
 859 sub scan_dir {
 860     my($dir, $recurse) = @_;
 861     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 862     local $_;
 863
 864     @subdirs = ();
 865     @pods = ();
 866
 867     opendir(DIR, $dir) ||
 868         die "$0: error opening directory $dir: $!\n";
 869     while (defined($_ = readdir(DIR))) {
 870         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 871             $pages{$_}  = "" unless defined $pages{$_};
 872             $pages{$_} .= "$dir/$_:";
 873             push(@subdirs, $_);
 874         } elsif (/\.pod$/) {                                # .pod
 875             s/\.pod$//;
 876             $pages{$_}  = "" unless defined $pages{$_};
 877             $pages{$_} .= "$dir/$_.pod:";
 878             push(@pods, "$dir/$_.pod");
 879         } elsif (/\.pm$/) {                                 # .pm
 880             s/\.pm$//;
 881             $pages{$_}  = "" unless defined $pages{$_};
 882             $pages{$_} .= "$dir/$_.pm:";
 883             push(@pods, "$dir/$_.pm");
 884         }
 885     }
 886     closedir(DIR);
 887
 888     # recurse on the subdirectories if necessary
 889     if ($recurse) {
 890         foreach my $subdir (@subdirs) {
 891             scan_dir("$dir/$subdir", $recurse);
 892         }
 893     }
 894 }
 895
 896 #
 897 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 898 #  build an index.
 899 #
 900 sub scan_headings {
 901     my($sections, @data) = @_;
 902     my($tag, $which_head, $otitle, $listdepth, $index);
 903
 904     # here we need      local $ignore = 0;
 905     #  unfortunately, we can't have it, because $ignore is lexical
 906     $ignore = 0;
 907
 908     $listdepth = 0;
 909     $index = "";
 910
 911     # scan for =head directives, note their name, and build an index
 912     #  pointing to each of them.
 913     foreach my $line (@data) {
 914         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 915             ($tag, $which_head, $otitle) = ($1,$2,$3);
 916
 917             my $title = depod( $otitle );
 918             my $name = htmlify( $title );
 919             $$sections{$name} = 1;
 920             $title = process_text( \$otitle );
 921
 922             while ($which_head != $listdepth) {
 923                 if ($which_head > $listdepth) {
 924                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 925                     $listdepth++;
 926                 } elsif ($which_head < $listdepth) {
 927                     $listdepth--;
 928                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 929                 }
 930             }
 931
 932             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 933                       "<A HREF=\"#" . $name . "\">" .
 934                       $title . "</A></LI>";
 935         }
 936     }
 937
 938     # finish off the lists
 939     while ($listdepth--) {
 940         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 941     }
 942
 943     # get rid of bogus lists
 944     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 945
 946     $ignore = 1;        # restore old value;
 947
 948     return $index;
 949 }
 950
 951 #
 952 # scan_items - scans the pod specified by $pod for =item directives.  we
 953 #  will use this information later on in resolving C<> links.
 954 #
 955 sub scan_items {
 956     my( $itemref, $pod, @poddata ) = @_;
 957     my($i, $item);
 958     local $_;
 959
 960     $pod =~ s/\.pod$//;
 961     $pod .= ".html" if $pod;
 962
 963     foreach $i (0..$#poddata) {
 964         my $txt = depod( $poddata[$i] );
 965
 966         # figure out what kind of item it is.
 967         # Build string for referencing this item.
 968         if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
 969             next unless $1;
 970             $item = $1;
 971         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
 972             $item = $1;
 973         } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
 974             $item = $1;
 975         } else {
 976             next;
 977         }
 978         my $fid = fragment_id( $item );
 979         $$itemref{$fid} = "$pod" if $fid;
 980     }
 981 }
 982
 983 #
 984 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 985 #
 986 sub process_head {
 987     my($tag, $heading, $hasindex) = @_;
 988
 989     # figure out the level of the =head
 990     $tag =~ /head([1-6])/;
 991     my $level = $1;
 992
 993     if( $listlevel ){
 994         warn "$0: $podfile: unterminated list at =head in paragraph $paragraph.  ignoring.\n";
 995         while( $listlevel ){
 996             process_back();
 997         }
 998     }
 999
1000     print HTML "<P>\n";
1001     if( $level == 1 && ! $top ){
1002         print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1003             if $hasindex;
1004         print HTML "<HR>\n"
1005     }
1006
1007     my $name = htmlify( depod( $heading ) );
1008     my $convert = process_text( \$heading );
1009     print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
1010 }
1011
1012
1013 #
1014 # emit_item_tag - print an =item's text
1015 # Note: The global $EmittedItem is used for inhibiting self-references.
1016 #
1017 my $EmittedItem;
1018
1019 sub emit_item_tag($$$){
1020     my( $otext, $text, $compact ) = @_;
1021     my $item = fragment_id( $text );
1022
1023     $EmittedItem = $item;
1024     ### print STDERR "emit_item_tag=$item ($text)\n";
1025
1026     print HTML '<STRONG>';
1027     if ($items_named{$item}++) {
1028         print HTML process_text( \$otext );
1029     } else {
1030         my $name = 'item_' . $item;
1031         print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1032     }
1033     print HTML "</STRONG><BR>\n";
1034     undef( $EmittedItem );
1035 }
1036
1037 sub emit_li {
1038     my( $tag ) = @_;
1039     if( $items_seen[$listlevel]++ == 0 ){
1040         push( @listend, "</$tag>" );
1041         print HTML "<$tag>\n";
1042     }
1043     print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1044 }
1045
1046 #
1047 # process_item - convert a pod item tag and convert it to HTML format.
1048 #
1049 sub process_item {
1050     my( $otext ) = @_;
1051
1052     # lots of documents start a list without doing an =over.  this is
1053     # bad!  but, the proper thing to do seems to be to just assume
1054     # they did do an =over.  so warn them once and then continue.
1055     if( $listlevel == 0 ){
1056         warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n";
1057         process_over();
1058     }
1059
1060     # formatting: insert a paragraph if preceding item has >1 paragraph
1061     if( $after_lpar ){
1062         print HTML "<P></P>\n";
1063         $after_lpar = 0;
1064     }
1065
1066     # remove formatting instructions from the text
1067     my $text = depod( $otext );
1068
1069     # all the list variants:
1070     if( $text =~ /\A\*/ ){ # bullet
1071         emit_li( 'UL' );
1072         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1073             my $tag = $1;
1074             $otext =~ s/\A\*\s+//;
1075             emit_item_tag( $otext, $tag, 1 );
1076         }
1077
1078     } elsif( $text =~ /\A\d+/ ){ # numbered list
1079         emit_li( 'OL' );
1080         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1081             my $tag = $1;
1082             $otext =~ s/\A\d+\.?\s*//;
1083             emit_item_tag( $otext, $tag, 1 );
1084         }
1085
1086     } else {                    # definition list
1087         emit_li( 'DL' );
1088         if ($text =~ /\A(.+)\Z/s ){ # should have text
1089             emit_item_tag( $otext, $text, 1 );
1090         }
1091        print HTML '<DD>';
1092     }
1093     print HTML "\n";
1094 }
1095
1096 #
1097 # process_over - process a pod over tag and start a corresponding HTML list.
1098 #
1099 sub process_over {
1100     # start a new list
1101     $listlevel++;
1102     push( @items_seen, 0 );
1103     $after_lpar = 0;
1104 }
1105
1106 #
1107 # process_back - process a pod back tag and convert it to HTML format.
1108 #
1109 sub process_back {
1110     if( $listlevel == 0 ){
1111         warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n";
1112         return;
1113     }
1114
1115     # close off the list.  note, I check to see if $listend[$listlevel] is
1116     # defined because an =item directive may have never appeared and thus
1117     # $listend[$listlevel] may have never been initialized.
1118     $listlevel--;
1119     if( defined $listend[$listlevel] ){
1120         print HTML '<P></P>' if $after_lpar;
1121         print HTML $listend[$listlevel];
1122         print HTML "\n";
1123         pop( @listend );
1124     }
1125     $after_lpar = 0;
1126
1127     # clean up item count
1128     pop( @items_seen );
1129 }
1130
1131 #
1132 # process_cut - process a pod cut tag, thus start ignoring pod directives.
1133 #
1134 sub process_cut {
1135     $ignore = 1;
1136 }
1137
1138 #
1139 # process_pod - process a pod pod tag, thus stop ignoring pod directives
1140 # until we see a corresponding cut.
1141 #
1142 sub process_pod {
1143     # no need to set $ignore to 0 cause the main loop did it
1144 }
1145
1146 #
1147 # process_for - process a =for pod tag.  if it's for html, spit
1148 # it out verbatim, if illustration, center it, otherwise ignore it.
1149 #
1150 sub process_for {
1151     my($whom, $text) = @_;
1152     if ( $whom =~ /^(pod2)?html$/i) {
1153         print HTML $text;
1154     } elsif ($whom =~ /^illustration$/i) {
1155         1 while chomp $text;
1156         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1157           $text .= $ext, last if -r "$text$ext";
1158         }
1159         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1160     }
1161 }
1162
1163 #
1164 # process_begin - process a =begin pod tag.  this pushes
1165 # whom we're beginning on the begin stack.  if there's a
1166 # begin stack, we only print if it us.
1167 #
1168 sub process_begin {
1169     my($whom, $text) = @_;
1170     $whom = lc($whom);
1171     push (@begin_stack, $whom);
1172     if ( $whom =~ /^(pod2)?html$/) {
1173         print HTML $text if $text;
1174     }
1175 }
1176
1177 #
1178 # process_end - process a =end pod tag.  pop the
1179 # begin stack.  die if we're mismatched.
1180 #
1181 sub process_end {
1182     my($whom, $text) = @_;
1183     $whom = lc($whom);
1184     if ($begin_stack[-1] ne $whom ) {
1185         die "Unmatched begin/end at chunk $paragraph\n"
1186     }
1187     pop( @begin_stack );
1188 }
1189
1190 #
1191 # process_pre - indented paragraph, made into <PRE></PRE>
1192 #
1193 sub process_pre {
1194     my( $text ) = @_;
1195     my( $rest );
1196     return if $ignore;
1197
1198     $rest = $$text;
1199
1200     # insert spaces in place of tabs
1201     $rest =~ s#.*#
1202             my $line = $&;
1203             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1204             $line;
1205         #eg;
1206
1207     # convert some special chars to HTML escapes
1208     $rest =~ s/&/&amp;/g;
1209     $rest =~ s/</&lt;/g;
1210     $rest =~ s/>/&gt;/g;
1211     $rest =~ s/"/&quot;/g;
1212
1213     # try and create links for all occurrences of perl.* within
1214     # the preformatted text.
1215     $rest =~ s{
1216                  (\s*)(perl\w+)
1217               }{
1218                  if ( defined $pages{$2} ){     # is a link
1219                      qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1220                  } elsif (defined $pages{dosify($2)}) { # is a link
1221                      qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1222                  } else {
1223                      "$1$2";
1224                  }
1225               }xeg;
1226      $rest =~ s{
1227                  (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1228                }{
1229                   my $url ;
1230                   if ( $htmlfileurl ne '' ){
1231                      # Here, we take advantage of the knowledge
1232                      # that $htmlfileurl ne '' implies $htmlroot eq ''.
1233                      # Since $htmlroot eq '', we need to prepend $htmldir
1234                      # on the fron of the link to get the absolute path
1235                      # of the link's target. We check for a leading '/'
1236                      # to avoid corrupting links that are #, file:, etc.
1237                      my $old_url = $3 ;
1238                      $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1239                      $url = relativize_url( "$old_url.html", $htmlfileurl );
1240                   } else {
1241                      $url = "$3.html" ;
1242                   }
1243                   "$1$url" ;
1244                }xeg;
1245
1246     # Look for embedded URLs and make them into links.  We don't
1247     # relativize them since they are best left as the author intended.
1248
1249     my $urls = '(' . join ('|', qw{
1250                 http
1251                 telnet
1252                 mailto
1253                 news
1254                 gopher
1255                 file
1256                 wais
1257                 ftp
1258             } )
1259         . ')';
1260
1261     my $ltrs = '\w';
1262     my $gunk = '/#~:.?+=&%@!\-';
1263     my $punc = '.:?\-';
1264     my $any  = "${ltrs}${gunk}${punc}";
1265
1266     $rest =~ s{
1267         \b                          # start at word boundary
1268         (                           # begin $1  {
1269           $urls     :               # need resource and a colon
1270           (?!:)                     # Ignore File::, among others.
1271           [$any] +?                 # followed by on or more
1272                                     #  of any valid character, but
1273                                     #  be conservative and take only
1274                                     #  what you need to....
1275         )                           # end   $1  }
1276         (?=                         # look-ahead non-consumptive assertion
1277                 [$punc]*            # either 0 or more puntuation
1278                 [^$any]             #   followed by a non-url char
1279             |                       # or else
1280                 $                   #   then end of the string
1281         )
1282       }{<A HREF="$1">$1</A>}igox;
1283
1284     # text should be as it is (verbatim)
1285     $$text = $rest;
1286 }
1287
1288
1289 #
1290 # pure text processing
1291 #
1292 # pure_text/inIS_text: differ with respect to automatic C<> recognition.
1293 # we don't want this to happen within IS
1294 #
1295 sub pure_text($){
1296     my $text = shift();
1297     process_puretext( $text, \$ptQuote, 1 );
1298 }
1299
1300 sub inIS_text($){
1301     my $text = shift();
1302     process_puretext( $text, \$ptQuote, 0 );
1303 }
1304
1305 #
1306 # process_puretext - process pure text (without pod-escapes) converting
1307 #  double-quotes and handling implicit C<> links.
1308 #
1309 sub process_puretext {
1310     my($text, $quote, $notinIS) = @_;
1311
1312     ## Guessing at func() or [$@%&]*var references in plain text is destined
1313     ## to produce some strange looking ref's. uncomment to disable:
1314     ## $notinIS = 0;
1315
1316     my(@words, $lead, $trail);
1317
1318     # convert double-quotes to single-quotes
1319     if( $$quote && $text =~ s/"/''/s ){
1320         $$quote = 0;
1321     }
1322     while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1323     $$quote = 1 if $text =~ s/"/``/s;
1324
1325     # keep track of leading and trailing white-space
1326     $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1327     $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1328
1329     # split at space/non-space boundaries
1330     @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1331
1332     # process each word individually
1333     foreach my $word (@words) {
1334         # skip space runs
1335         next if $word =~ /^\s*$/;
1336         # see if we can infer a link
1337         if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
1338             # has parenthesis so should have been a C<> ref
1339             ## try for a pagename (perlXXX(1))?
1340             if( $2 =~ /^\d+$/ ){
1341                 my $url = page_sect( $word, '' );
1342                 if( defined $url ){
1343                     $word = "<A HREF=\"$url\">the $word manpage</A>";
1344                     next;
1345                 }
1346             }
1347             $word = emit_C( $word );
1348
1349 #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1350 ##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1351 ##          # perl variables, should be a C<> ref
1352 ##          $word = emit_C( $word );
1353
1354         } elsif ($word =~ m,^\w+://\w,) {
1355             # looks like a URL
1356             # Don't relativize it: leave it as the author intended
1357             $word = qq(<A HREF="$word">$word</A>);
1358         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1359             # looks like an e-mail address
1360             my ($w1, $w2, $w3) = ("", $word, "");
1361             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1362             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1363             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1364         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1365             $word = html_escape($word) if $word =~ /["&<>]/;
1366             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1367         } else {
1368             $word = html_escape($word) if $word =~ /["&<>]/;
1369         }
1370     }
1371
1372     # put everything back together
1373     return $lead . join( '', @words ) . $trail;
1374 }
1375
1376
1377 #
1378 # process_text - handles plaintext that appears in the input pod file.
1379 # there may be pod commands embedded within the text so those must be
1380 # converted to html commands.
1381 #
1382
1383 sub process_text1($$;$);
1384
1385 sub process_text {
1386     return if $ignore;
1387     my( $tref ) = @_;
1388     my $res = process_text1( 0, $tref );
1389     $$tref = $res;
1390 }
1391
1392 sub process_text1($$;$){
1393     my( $lev, $rstr, $func ) = @_;
1394     $lev++ unless defined $func;
1395     my $res = '';
1396
1397     $func ||= '';
1398     if( $func eq 'B' ){
1399         # B<text> - boldface
1400         $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1401
1402     } elsif( $func eq 'C' ){
1403         # C<code> - can be a ref or <CODE></CODE>
1404         # need to extract text
1405         my $par = go_ahead( $rstr, 'C' );
1406
1407         ## clean-up of the link target
1408         my $text = depod( $par );
1409
1410         ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1411         ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1412
1413         $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1414
1415     } elsif( $func eq 'E' ){
1416         # E<x> - convert to character
1417         $$rstr =~ s/^(\w+)>//;
1418         $res = "&$1;";
1419
1420     } elsif( $func eq 'F' ){
1421         # F<filename> - italizice
1422         $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1423
1424     } elsif( $func eq 'I' ){
1425         # I<text> - italizice
1426         $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1427
1428     } elsif( $func eq 'L' ){
1429         # L<link> - link
1430         ## L<text|cross-ref> => produce text, use cross-ref for linking
1431         ## L<cross-ref> => make text from cross-ref
1432         ## need to extract text
1433         my $par = go_ahead( $rstr, 'L' );
1434
1435         # some L<>'s that shouldn't be:
1436         # a) full-blown URL's are emitted as-is
1437         if( $par =~ m{^\w+://}s ){
1438             return make_URL_href( $par );
1439         }
1440         # b) C<...> is stripped and treated as C<>
1441         if( $par =~ /^C<(.*)>$/ ){
1442             my $text = depod( $1 );
1443             return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1444         }
1445
1446         # analyze the contents
1447         $par =~ s/\n/ /g;   # undo word-wrapped tags
1448         my $opar = $par;
1449         my $linktext;
1450         if( $par =~ s{^([^|]+)\|}{} ){
1451             $linktext = $1;
1452         }
1453
1454         # make sure sections start with a /
1455         $par =~ s{^"}{/"};
1456
1457         my( $page, $section, $ident );
1458
1459         # check for link patterns
1460         if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1461             # we've got a name/ident (no quotes)
1462             ( $page, $ident ) = ( $1, $2 );
1463             ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1464
1465         } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1466             # even though this should be a "section", we go for ident first
1467             ( $page, $ident ) = ( $1, $2 );
1468             ### print STDERR "--> L<$par> to page $page, section $section\n";
1469
1470         } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1471             ( $page, $section ) = ( '', $par );
1472             ### print STDERR "--> L<$par> to void page, section $section\n";
1473
1474         } else {
1475             ( $page, $section ) = ( $par, '' );
1476             ### print STDERR "--> L<$par> to page $par, void section\n";
1477         }
1478
1479         # now, either $section or $ident is defined. the convoluted logic
1480         # below tries to resolve L<> according to what the user specified.
1481         # failing this, we try to find the next best thing...
1482         my( $url, $ltext, $fid );
1483
1484         RESOLVE: {
1485             if( defined $ident ){
1486                 ## try to resolve $ident as an item
1487                 ( $url, $fid ) = coderef( $page, $ident );
1488                 if( $url ){
1489                     if( ! defined( $linktext ) ){
1490                         $linktext = $ident;
1491                         $linktext .= " in " if $ident && $page;
1492                         $linktext .= "the $page manpage" if $page;
1493                     }
1494                     ###  print STDERR "got coderef url=$url\n";
1495                     last RESOLVE;
1496                 }
1497                 ## no luck: go for a section (auto-quoting!)
1498                 $section = $ident;
1499             }
1500             ## now go for a section
1501             my $htmlsection = htmlify( $section );
1502             $url = page_sect( $page, $htmlsection );
1503             if( $url ){
1504                 if( ! defined( $linktext ) ){
1505                     $linktext = $section;
1506                     $linktext .= " in " if $section && $page;
1507                     $linktext .= "the $page manpage" if $page;
1508                 }
1509                 ### print STDERR "got page/section url=$url\n";
1510                 last RESOLVE;
1511             }
1512             ## no luck: go for an ident
1513             if( $section ){
1514                 $ident = $section;
1515             } else {
1516                 $ident = $page;
1517                 $page  = undef();
1518             }
1519             ( $url, $fid ) = coderef( $page, $ident );
1520             if( $url ){
1521                 if( ! defined( $linktext ) ){
1522                     $linktext = $ident;
1523                     $linktext .= " in " if $ident && $page;
1524                     $linktext .= "the $page manpage" if $page;
1525                 }
1526                 ### print STDERR "got section=>coderef url=$url\n";
1527                 last RESOLVE;
1528             }
1529
1530             # warning; show some text.
1531             $linktext = $opar unless defined $linktext;
1532             warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1533         }
1534
1535         # now we have an URL or just plain code
1536         $$rstr = $linktext . '>' . $$rstr;
1537         if( defined( $url ) ){
1538             $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1539         } else {
1540             $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1541         }
1542
1543     } elsif( $func eq 'S' ){
1544         # S<text> - non-breaking spaces
1545         $res = process_text1( $lev, $rstr );
1546         $res =~ s/ /&nbsp;/g;
1547
1548     } elsif( $func eq 'X' ){
1549         # X<> - ignore
1550         $$rstr =~ s/^[^>]*>//;
1551
1552     } elsif( $func eq 'Z' ){
1553         # Z<> - empty
1554         warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1555             unless $$rstr =~ s/^>//;
1556
1557     } else {
1558         while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1559             # all others: either recurse into new function or
1560             # terminate at closing angle bracket
1561             my $pt = $1;
1562             $pt .= '>' if $2 eq '>' &&  $lev == 1;
1563             $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1564             return $res if $2 eq '>' && $lev > 1;
1565             if( $2 ne '>' ){
1566                 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1567             }
1568
1569         }
1570         if( $lev == 1 ){
1571             $res .= pure_text( $$rstr );
1572         } else {
1573             warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1574         }
1575     }
1576     return $res;
1577 }
1578
1579 #
1580 # go_ahead: extract text of an IS (can be nested)
1581 #
1582 sub go_ahead($$){
1583     my( $rstr, $func ) = @_;
1584     my $res = '';
1585     my $level = 1;
1586     while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1587         $res .= $1;
1588         if( $2 eq '>' ){
1589             return $res if --$level == 0;
1590         } else {
1591             ++$level;
1592         }
1593         $res .= $2;
1594     }
1595     warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1596     return $res;
1597 }
1598
1599 #
1600 # emit_C - output result of C<text>
1601 #    $text is the depod-ed text
1602 #
1603 sub emit_C($;$){
1604     my( $text, $nocode ) = @_;
1605     my $res;
1606     my( $url, $fid ) = coderef( undef(), $text );
1607
1608     # need HTML-safe text
1609     my $linktext = html_escape( $text );
1610
1611     if( defined( $url ) &&
1612         (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1613         $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1614     } elsif( 0 && $nocode ){
1615         $res = $linktext;
1616     } else {
1617         $res = "<CODE>$linktext</CODE>";
1618     }
1619     return $res;
1620 }
1621
1622 #
1623 # html_escape: make text safe for HTML
1624 #
1625 sub html_escape {
1626     my $rest = $_[0];
1627     $rest   =~ s/&/&amp;/g;
1628     $rest   =~ s/</&lt;/g;
1629     $rest   =~ s/>/&gt;/g;
1630     $rest   =~ s/"/&quot;/g;
1631     return $rest;
1632 }
1633
1634
1635 #
1636 # dosify - convert filenames to 8.3
1637 #
1638 sub dosify {
1639     my($str) = @_;
1640     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1641     if ($Is83) {
1642         $str = lc $str;
1643         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1644         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1645     }
1646     return $str;
1647 }
1648
1649 #
1650 # page_sect - make an URL from the text of a L<>
1651 #
1652 sub page_sect($$) {
1653     my( $page, $section ) = @_;
1654     my( $linktext, $page83, $link);     # work strings
1655
1656     # check if we know that this is a section in this page
1657     if (!defined $pages{$page} && defined $sections{$page}) {
1658         $section = $page;
1659         $page = "";
1660         ### print STDERR "reset page='', section=$section\n";
1661     }
1662
1663     $page83=dosify($page);
1664     $page=$page83 if (defined $pages{$page83});
1665     if ($page eq "") {
1666         $link = "#" . htmlify( $section );
1667     } elsif ( $page =~ /::/ ) {
1668         $page =~ s,::,/,g;
1669         # Search page cache for an entry keyed under the html page name,
1670         # then look to see what directory that page might be in.  NOTE:
1671         # this will only find one page. A better solution might be to produce
1672         # an intermediate page that is an index to all such pages.
1673         my $page_name = $page ;
1674         $page_name =~ s,^.*/,, ;
1675         if ( defined( $pages{ $page_name } ) &&
1676              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1677            ) {
1678             $page = $1 ;
1679         }
1680         else {
1681             # NOTE: This branch assumes that all A::B pages are located in
1682             # $htmlroot/A/B.html . This is often incorrect, since they are
1683             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1684             # analyze the contents of %pages and figure out where any
1685             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1686             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1687             # lib/A/B.pm. This is also limited, but it's an improvement.
1688             # Maybe a hints file so that the links point to the correct places
1689             # nonetheless?
1690
1691         }
1692         $link = "$htmlroot/$page.html";
1693         $link .= "#" . htmlify( $section ) if ($section);
1694     } elsif (!defined $pages{$page}) {
1695         $link = "";
1696     } else {
1697         $section = htmlify( $section ) if $section ne "";
1698         ### print STDERR "...section=$section\n";
1699
1700         # if there is a directory by the name of the page, then assume that an
1701         # appropriate section will exist in the subdirectory
1702 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1703         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1704             $link = "$htmlroot/$1/$section.html";
1705             ### print STDERR "...link=$link\n";
1706
1707         # since there is no directory by the name of the page, the section will
1708         # have to exist within a .html of the same name.  thus, make sure there
1709         # is a .pod or .pm that might become that .html
1710         } else {
1711             $section = "#$section" if $section;
1712             ### print STDERR "...section=$section\n";
1713
1714             # check if there is a .pod with the page name
1715             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1716                 $link = "$htmlroot/$1.html$section";
1717             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1718                 $link = "$htmlroot/$1.html$section";
1719             } else {
1720                 $link = "";
1721             }
1722         }
1723     }
1724
1725     if ($link) {
1726         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1727         # implies $htmlroot eq ''. This means that the link in question
1728         # needs a prefix of $htmldir if it begins with '/'. The test for
1729         # the initial '/' is done to avoid '#'-only links, and to allow
1730         # for other kinds of links, like file:, ftp:, etc.
1731         my $url ;
1732         if (  $htmlfileurl ne '' ) {
1733             $link = "$htmldir$link" if $link =~ m{^/};
1734             $url = relativize_url( $link, $htmlfileurl );
1735 # print( "  b: [$link,$htmlfileurl,$url]\n" );
1736         }
1737         else {
1738             $url = $link ;
1739         }
1740         return $url;
1741
1742     } else {
1743         return undef();
1744     }
1745 }
1746
1747 #
1748 # relativize_url - convert an absolute URL to one relative to a base URL.
1749 # Assumes both end in a filename.
1750 #
1751 sub relativize_url {
1752     my ($dest,$source) = @_ ;
1753
1754     my ($dest_volume,$dest_directory,$dest_file) =
1755         File::Spec::Unix->splitpath( $dest ) ;
1756     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1757
1758     my ($source_volume,$source_directory,$source_file) =
1759         File::Spec::Unix->splitpath( $source ) ;
1760     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1761
1762     my $rel_path = '' ;
1763     if ( $dest ne '' ) {
1764        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1765     }
1766
1767     if ( $rel_path ne ''                &&
1768          substr( $rel_path, -1 ) ne '/' &&
1769          substr( $dest_file, 0, 1 ) ne '#'
1770         ) {
1771         $rel_path .= "/$dest_file" ;
1772     }
1773     else {
1774         $rel_path .= "$dest_file" ;
1775     }
1776
1777     return $rel_path ;
1778 }
1779
1780
1781 #
1782 # coderef - make URL from the text of a C<>
1783 #
1784 sub coderef($$){
1785     my( $page, $item ) = @_;
1786     my( $url );
1787
1788     my $fid = fragment_id( $item );
1789     return( $url, $fid );
1790     if( defined( $page ) ){
1791         # we have been given a $page...
1792         $page =~ s{::}{/}g;
1793
1794         # Do we take it? Item could be a section!
1795         my $base = $items{$fid} || "";
1796         $base =~ s{[^/]*/}{};
1797         if( $base ne "$page.html" ){
1798             ###   print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1799             $page = undef();
1800         }
1801
1802     } else {
1803         # no page - local items precede cached items
1804         if(  exists $local_items{$fid} ){
1805             $page = $local_items{$fid};
1806         } else {
1807             $page = $items{$fid};
1808         }
1809     }
1810
1811     # if there was a pod file that we found earlier with an appropriate
1812     # =item directive, then create a link to that page.
1813     if( defined $page ){
1814         if( $page ){
1815             if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
1816                 $page = $1 . '.html';
1817             }
1818             my $link = "$htmlroot/$page#item_$fid";
1819
1820             # Here, we take advantage of the knowledge that $htmlfileurl
1821             # ne '' implies $htmlroot eq ''.
1822             if (  $htmlfileurl ne '' ) {
1823                 $link = "$htmldir$link" ;
1824                 $url = relativize_url( $link, $htmlfileurl ) ;
1825             } else {
1826                 $url = $link ;
1827             }
1828         } else {
1829             $url = "#item_" . $fid;
1830         }
1831
1832         confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1833     }
1834     return( $url, $fid );
1835 }
1836
1837
1838
1839 #
1840 # Adapted from Nick Ing-Simmons' PodToHtml package.
1841 sub relative_url {
1842     my $source_file = shift ;
1843     my $destination_file = shift;
1844
1845     my $source = URI::file->new_abs($source_file);
1846     my $uo = URI::file->new($destination_file,$source)->abs;
1847     return $uo->rel->as_string;
1848 }
1849
1850
1851 #
1852 # finish_list - finish off any pending HTML lists.  this should be called
1853 # after the entire pod file has been read and converted.
1854 #
1855 sub finish_list {
1856     while ($listlevel > 0) {
1857         print HTML "</DL>\n";
1858         $listlevel--;
1859     }
1860 }
1861
1862 #
1863 # htmlify - converts a pod section specification to a suitable section
1864 # specification for HTML. Note that we keep spaces and special characters
1865 # except ", ? (Netscape problem) and the hyphen (writer's problem...).
1866 #
1867 sub htmlify {
1868     my( $heading) = @_;
1869     $heading =~ s/(\s+)/ /g;
1870     $heading =~ s/\s+\Z//;
1871     $heading =~ s/\A\s+//;
1872     # The hyphen is a disgrace to the English language.
1873     $heading =~ s/[-"?]//g;
1874     $heading = lc( $heading );
1875     return $heading;
1876 }
1877
1878 #
1879 # depod - convert text by eliminating all interior sequences
1880 # Note: can be called with copy or modify semantics
1881 #
1882 my %E2c;
1883 $E2c{lt} = '<';
1884 $E2c{gt} = '>';
1885 $E2c{sol} = '/';
1886 $E2c{verbar} = '|';
1887
1888 sub depod1($;$);
1889
1890 sub depod($){
1891     my $string;
1892     if( ref( $_[0] ) ){
1893         $string =  ${$_[0]};
1894         ${$_[0]} = depod1( \$string );
1895     } else {
1896         $string =  $_[0];
1897         depod1( \$string );
1898     }
1899 }
1900
1901 sub depod1($;$){
1902   my( $rstr, $func ) = @_;
1903   my $res = '';
1904   return $res unless defined $$rstr;
1905   if( ! defined( $func ) ){
1906       # skip to next begin of an interior sequence
1907       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1908          # recurse into its text
1909          $res .= $1 . depod1( $rstr, $2 );
1910       }
1911       $res .= $$rstr;
1912   } elsif( $func eq 'E' ){
1913       # E<x> - convert to character
1914       $$rstr =~ s/^(\w+)>//;
1915       $res .= $E2c{$1} || "";
1916   } elsif( $func eq 'X' ){
1917       # X<> - ignore
1918       $$rstr =~ s/^[^>]*>//;
1919   } elsif( $func eq 'Z' ){
1920       # Z<> - empty
1921       $$rstr =~ s/^>//;
1922   } else {
1923       # all others: either recurse into new function or
1924       # terminate at closing angle bracket
1925       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1926           $res .= $1;
1927           last if $2 eq '>';
1928           $res .= depod1( $rstr, substr($2,0,1) );
1929       }
1930       ## If we're here and $2 ne '>': undelimited interior sequence.
1931       ## Ignored, as this is called without proper indication of where we are.
1932       ## Rely on process_text to produce diagnostics.
1933   }
1934   return $res;
1935 }
1936
1937 #
1938 # fragment_id - construct a fragment identifier from:
1939 #   a) =item text
1940 #   b) contents of C<...>
1941 #
1942 my @hc;
1943 sub fragment_id {
1944     my $text = shift();
1945     $text =~ s/\s+\Z//s;
1946     if( $text ){
1947         # a method or function?
1948         return $1 if $text =~ /(\w+)\s*\(/;
1949         return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1950
1951         # a variable name?
1952         return $1 if $text =~ /^([$@%*]\S+)/;
1953
1954         # some pattern matching operator?
1955         return $1 if $text =~ m|^(\w+/).*/\w*$|;
1956
1957         # fancy stuff... like "do { }"
1958         return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1959
1960         # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1961         # and some funnies with ... Module ...
1962         return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1963         return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1964
1965         # text? normalize!
1966         $text =~ s/\s+/_/sg;
1967         $text =~ s{(\W)}{
1968          defined( $hc[ord($1)] ) ? $hc[ord($1)]
1969                  : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1970         $text = substr( $text, 0, 50 );
1971     } else {
1972         return undef();
1973     }
1974 }
1975
1976 #
1977 # make_URL_href - generate HTML href from URL
1978 # Special treatment for CGI queries.
1979 #
1980 sub make_URL_href($){
1981     my( $url ) = @_;
1982     if( $url !~
1983         s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
1984         $url = "<A HREF=\"$url\">$url</A>";
1985     }
1986     return $url;
1987 }
1988
1989 1;