lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::Spec::Unix;
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.03;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files. Not passing this
  54 causes all links to be absolute, since this is the value that tells
  55 Pod::Html the root of the documentation tree.
  56
  57 =item htmlroot
  58
  59     --htmlroot=name
  60
  61 Sets the base URL for the HTML files.  When cross-references are made,
  62 the HTML root is prepended to the URL.
  63
  64 =item infile
  65
  66     --infile=name
  67
  68 Specify the pod file to convert.  Input is taken from STDIN if no
  69 infile is specified.
  70
  71 =item outfile
  72
  73     --outfile=name
  74
  75 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  76 is specified.
  77
  78 =item podroot
  79
  80     --podroot=name
  81
  82 Specify the base directory for finding library pods.
  83
  84 =item podpath
  85
  86     --podpath=name:...:name
  87
  88 Specify which subdirectories of the podroot contain pod files whose
  89 HTML converted forms can be linked-to in cross-references.
  90
  91 =item libpods
  92
  93     --libpods=name:...:name
  94
  95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  96
  97 =item netscape
  98
  99     --netscape
 100
 101 Use Netscape HTML directives when applicable.
 102
 103 =item nonetscape
 104
 105     --nonetscape
 106
 107 Do not use Netscape HTML directives (default).
 108
 109 =item index
 110
 111     --index
 112
 113 Generate an index at the top of the HTML file (default behaviour).
 114
 115 =item noindex
 116
 117     --noindex
 118
 119 Do not generate an index at the top of the HTML file.
 120
 121
 122 =item recurse
 123
 124     --recurse
 125
 126 Recurse into subdirectories specified in podpath (default behaviour).
 127
 128 =item norecurse
 129
 130     --norecurse
 131
 132 Do not recurse into subdirectories specified in podpath.
 133
 134 =item title
 135
 136     --title=title
 137
 138 Specify the title of the resulting HTML file.
 139
 140 =item css
 141
 142     --css=stylesheet
 143
 144 Specify the URL of a cascading style sheet.
 145
 146 =item verbose
 147
 148     --verbose
 149
 150 Display progress messages.
 151
 152 =item quiet
 153
 154     --quiet
 155
 156 Don't display I<mostly harmless> warning messages.
 157
 158 =back
 159
 160 =head1 EXAMPLE
 161
 162     pod2html("pod2html",
 163              "--podpath=lib:ext:pod:vms",
 164              "--podroot=/usr/src/perl",
 165              "--htmlroot=/perl/nmanual",
 166              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 167              "--recurse",
 168              "--infile=foo.pod",
 169              "--outfile=/perl/nmanual/foo.html");
 170
 171 =head1 ENVIRONMENT
 172
 173 Uses $Config{pod2html} to setup default options.
 174
 175 =head1 AUTHOR
 176
 177 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 178
 179 =head1 SEE ALSO
 180
 181 L<perlpod>
 182
 183 =head1 COPYRIGHT
 184
 185 This program is distributed under the Artistic License.
 186
 187 =cut
 188
 189 my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
 190 my $dircache = "pod2htmd$cache_ext";
 191 my $itemcache = "pod2htmi$cache_ext";
 192
 193 my @begin_stack = ();           # begin/end stack
 194
 195 my @libpods = ();               # files to search for links from C<> directives
 196 my $htmlroot = "/";             # http-server base directory from which all
 197                                 #   relative paths in $podpath stem.
 198 my $htmldir = "";               # The directory to which the html pages
 199                                 # will (eventually) be written.
 200 my $htmlfile = "";              # write to stdout by default
 201 my $htmlfileurl = "" ;          # The url that other files would use to
 202                                 # refer to this file.  This is only used
 203                                 # to make relative urls that point to
 204                                 # other files.
 205 my $podfile = "";               # read from stdin by default
 206 my @podpath = ();               # list of directories containing library pods.
 207 my $podroot = ".";              # filesystem base directory from which all
 208                                 #   relative paths in $podpath stem.
 209 my $css = '';                   # Cascading style sheet
 210 my $recurse = 1;                # recurse on subdirectories in $podpath.
 211 my $quiet = 0;                  # not quiet by default
 212 my $verbose = 0;                # not verbose by default
 213 my $doindex = 1;                # non-zero if we should generate an index
 214 my $listlevel = 0;              # current list depth
 215 my @listend = ();               # the text to use to end the list.
 216 my $after_lpar = 0;             # set to true after a par in an =item
 217 my $ignore = 1;                 # whether or not to format text.  we don't
 218                                 #   format text until we hit our first pod
 219                                 #   directive.
 220
 221 my %items_named = ();           # for the multiples of the same item in perlfunc
 222 my @items_seen = ();
 223 my $netscape = 0;               # whether or not to use netscape directives.
 224 my $title;                      # title to give the pod(s)
 225 my $header = 0;                 # produce block header/footer
 226 my $top = 1;                    # true if we are at the top of the doc.  used
 227                                 #   to prevent the first <HR> directive.
 228 my $paragraph;                  # which paragraph we're processing (used
 229                                 #   for error messages)
 230 my $ptQuote = 0;                # status of double-quote conversion
 231 my %pages = ();                 # associative array used to find the location
 232                                 #   of pages referenced by L<> links.
 233 my %sections = ();              # sections within this page
 234 my %items = ();                 # associative array used to find the location
 235                                 #   of =item directives referenced by C<> links
 236 my %local_items = ();           # local items - avoid destruction of %items
 237 my $Is83;                       # is dos with short filenames (8.3)
 238
 239 sub init_globals {
 240 $dircache = "pod2htmd$cache_ext";
 241 $itemcache = "pod2htmi$cache_ext";
 242
 243 @begin_stack = ();              # begin/end stack
 244
 245 @libpods = ();          # files to search for links from C<> directives
 246 $htmlroot = "/";                # http-server base directory from which all
 247                                 #   relative paths in $podpath stem.
 248 $htmlfile = "";         # write to stdout by default
 249 $podfile = "";          # read from stdin by default
 250 @podpath = ();          # list of directories containing library pods.
 251 $podroot = ".";         # filesystem base directory from which all
 252                                 #   relative paths in $podpath stem.
 253 $css = '';                   # Cascading style sheet
 254 $recurse = 1;           # recurse on subdirectories in $podpath.
 255 $quiet = 0;             # not quiet by default
 256 $verbose = 0;           # not verbose by default
 257 $doindex = 1;                   # non-zero if we should generate an index
 258 $listlevel = 0;         # current list depth
 259 @listend = ();          # the text to use to end the list.
 260 $after_lpar = 0;        # set to true after a par in an =item
 261 $ignore = 1;                    # whether or not to format text.  we don't
 262                                 #   format text until we hit our first pod
 263                                 #   directive.
 264
 265 @items_seen = ();
 266 %items_named = ();
 267 $netscape = 0;          # whether or not to use netscape directives.
 268 $header = 0;                    # produce block header/footer
 269 $title = '';                    # title to give the pod(s)
 270 $top = 1;                       # true if we are at the top of the doc.  used
 271                                 #   to prevent the first <HR> directive.
 272 $paragraph = '';                        # which paragraph we're processing (used
 273                                 #   for error messages)
 274 %sections = ();         # sections within this page
 275
 276 # These are not reinitialised here but are kept as a cache.
 277 # See get_cache and related cache management code.
 278 #%pages = ();                   # associative array used to find the location
 279                                 #   of pages referenced by L<> links.
 280 #%items = ();                   # associative array used to find the location
 281                                 #   of =item directives referenced by C<> links
 282 %local_items = ();
 283 $Is83=$^O eq 'dos';
 284 }
 285
 286 #
 287 # clean_data: global clean-up of pod data
 288 #
 289 sub clean_data($){
 290     my( $dataref ) = @_;
 291     my $i;
 292     for( $i = 0; $i <= $#$dataref; $i++ ){
 293         ${$dataref}[$i] =~ s/\s+\Z//;
 294
 295         # have a look for all-space lines
 296         if( ${$dataref}[$i] =~ /^\s+$/m ){
 297             my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
 298             splice( @$dataref, $i, 1, @chunks );
 299         }
 300     }
 301 }
 302
 303
 304 sub pod2html {
 305     local(@ARGV) = @_;
 306     local($/);
 307     local $_;
 308
 309     init_globals();
 310
 311     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 312
 313     # cache of %pages and %items from last time we ran pod2html
 314
 315     #undef $opt_help if defined $opt_help;
 316
 317     # parse the command-line parameters
 318     parse_command_line();
 319
 320     # set some variables to their default values if necessary
 321     local *POD;
 322     unless (@ARGV && $ARGV[0]) {
 323         $podfile  = "-" unless $podfile;        # stdin
 324         open(POD, "<$podfile")
 325                 || die "$0: cannot open $podfile file for input: $!\n";
 326     } else {
 327         $podfile = $ARGV[0];  # XXX: might be more filenames
 328         *POD = *ARGV;
 329     }
 330     $htmlfile = "-" unless $htmlfile;   # stdout
 331     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 332     $htmldir =~ s#/$## ;                # so we don't get a //
 333     if (  $htmlroot eq ''
 334        && defined( $htmldir )
 335        && $htmldir ne ''
 336        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 337        )
 338     {
 339         # Set the 'base' url for this file, so that we can use it
 340         # as the location from which to calculate relative links
 341         # to other files. If this is '', then absolute links will
 342         # be used throughout.
 343         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 344     }
 345
 346     # read the pod a paragraph at a time
 347     warn "Scanning for sections in input file(s)\n" if $verbose;
 348     $/ = "";
 349     my @poddata  = <POD>;
 350     close(POD);
 351     clean_data( \@poddata );
 352
 353     # scan the pod for =head[1-6] directives and build an index
 354     my $index = scan_headings(\%sections, @poddata);
 355
 356     unless($index) {
 357         warn "No headings in $podfile\n" if $verbose;
 358     }
 359
 360     # open the output file
 361     open(HTML, ">$htmlfile")
 362             || die "$0: cannot open $htmlfile file for output: $!\n";
 363
 364     # put a title in the HTML file if one wasn't specified
 365     if ($title eq '') {
 366         TITLE_SEARCH: {
 367             for (my $i = 0; $i < @poddata; $i++) {
 368                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 369                     for my $para ( @poddata[$i, $i+1] ) {
 370                         last TITLE_SEARCH
 371                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 372                     }
 373                 }
 374
 375             }
 376         }
 377     }
 378     if (!$title and $podfile =~ /\.pod$/) {
 379         # probably a split pod so take first =head[12] as title
 380         for (my $i = 0; $i < @poddata; $i++) {
 381             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 382         }
 383         warn "adopted '$title' as title for $podfile\n"
 384             if $verbose and $title;
 385     }
 386     if ($title) {
 387         $title =~ s/\s*\(.*\)//;
 388     } else {
 389         warn "$0: no title for $podfile" unless $quiet;
 390         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 391         $title = ($podfile eq "-" ? 'No Title' : $1);
 392         warn "using $title" if $verbose;
 393     }
 394     my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
 395     $csslink =~ s,\\,/,g;
 396     $csslink =~ s,(/.):,$1|,;
 397
 398     my $block = $header ? <<END_OF_BLOCK : '';
 399 <TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
 400 <TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
 401 <FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
 402 </TD></TR>
 403 </TABLE>
 404 END_OF_BLOCK
 405
 406     print HTML <<END_OF_HEAD;
 407 <HTML>
 408 <HEAD>
 409 <TITLE>$title</TITLE>$csslink
 410 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 411 </HEAD>
 412
 413 <BODY>
 414 $block
 415 END_OF_HEAD
 416
 417     # load/reload/validate/cache %pages and %items
 418     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 419
 420     # scan the pod for =item directives
 421     scan_items( \%local_items, "", @poddata);
 422
 423     # put an index at the top of the file.  note, if $doindex is 0 we
 424     # still generate an index, but surround it with an html comment.
 425     # that way some other program can extract it if desired.
 426     $index =~ s/--+/-/g;
 427     print HTML "<A NAME=\"__index__\"></A>\n";
 428     print HTML "<!-- INDEX BEGIN -->\n";
 429     print HTML "<!--\n" unless $doindex;
 430     print HTML $index;
 431     print HTML "-->\n" unless $doindex;
 432     print HTML "<!-- INDEX END -->\n\n";
 433     print HTML "<HR>\n" if $doindex and $index;
 434
 435     # now convert this file
 436     my $after_item;             # set to true after an =item
 437     warn "Converting input file $podfile\n" if $verbose;
 438     foreach my $i (0..$#poddata){
 439         $ptQuote = 0; # status of quote conversion
 440
 441         $_ = $poddata[$i];
 442         $paragraph = $i+1;
 443         if (/^(=.*)/s) {        # is it a pod directive?
 444             $ignore = 0;
 445             $after_item = 0;
 446             $_ = $1;
 447             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 448                 process_begin($1, $2);
 449             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 450                 process_end($1, $2);
 451             } elsif (/^=cut/) {                 # =cut
 452                 process_cut();
 453             } elsif (/^=pod/) {                 # =pod
 454                 process_pod();
 455             } else {
 456                 next if @begin_stack && $begin_stack[-1] ne 'html';
 457
 458                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 459                     process_head( $1, $2, $doindex && $index );
 460                 } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
 461                     warn "$0: $podfile: =item without bullet, number or text"
 462                        . " in paragraph $paragraph.\n" if $1 eq '';
 463                     process_item( $1 );
 464                     $after_item = 1;
 465                 } elsif (/^=over\s*(.*)/) {             # =over N
 466                     process_over();
 467                 } elsif (/^=back/) {            # =back
 468                     process_back();
 469                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 470                     process_for($1,$2);
 471                 } else {
 472                     /^=(\S*)\s*/;
 473                     warn "$0: $podfile: unknown pod directive '$1' in "
 474                        . "paragraph $paragraph.  ignoring.\n";
 475                 }
 476             }
 477             $top = 0;
 478         }
 479         else {
 480             next if $ignore;
 481             next if @begin_stack && $begin_stack[-1] ne 'html';
 482             my $text = $_;
 483             if( $text =~ /\A\s+/ ){
 484                 process_pre( \$text );
 485                 print HTML "<PRE>\n$text</PRE>\n";
 486
 487             } else {
 488                 process_text( \$text );
 489
 490                 # experimental: check for a paragraph where all lines
 491                 # have some ...\t...\t...\n pattern
 492                 if( $text =~ /\t/ ){
 493                     my @lines = split( "\n", $text );
 494                     if( @lines > 1 ){
 495                         my $all = 2;
 496                         foreach my $line ( @lines ){
 497                             if( $line =~ /\S/ && $line !~ /\t/ ){
 498                                 $all--;
 499                                 last if $all == 0;
 500                             }
 501                         }
 502                         if( $all > 0 ){
 503                             $text =~ s/\t+/<TD>/g;
 504                             $text =~ s/^/<TR><TD>/gm;
 505                             $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
 506                                     $text . '</TABLE>';
 507                         }
 508                     }
 509                 }
 510                 ## end of experimental
 511
 512                 if( $after_item ){
 513                     print HTML "$text\n";
 514                     $after_lpar = 1;
 515                 } else {
 516                     print HTML "<P>$text</P>\n";
 517                 }
 518             }
 519             $after_item = 0;
 520         }
 521     }
 522
 523     # finish off any pending directives
 524     finish_list();
 525
 526     # link to page index
 527     print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
 528         if $doindex and $index;
 529
 530     print HTML <<END_OF_TAIL;
 531 $block
 532 </BODY>
 533
 534 </HTML>
 535 END_OF_TAIL
 536
 537     # close the html file
 538     close(HTML);
 539
 540     warn "Finished\n" if $verbose;
 541 }
 542
 543 ##############################################################################
 544
 545 my $usage;                      # see below
 546 sub usage {
 547     my $podfile = shift;
 548     warn "$0: $podfile: @_\n" if @_;
 549     die $usage;
 550 }
 551
 552 $usage =<<END_OF_USAGE;
 553 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 554            --podpath=<name>:...:<name> --podroot=<name>
 555            --libpods=<name>:...:<name> --recurse --verbose --index
 556            --netscape --norecurse --noindex
 557
 558   --flush      - flushes the item and directory caches.
 559   --help       - prints this message.
 560   --htmlroot   - http-server base directory from which all relative paths
 561                  in podpath stem (default is /).
 562   --index      - generate an index at the top of the resulting html
 563                  (default).
 564   --infile     - filename for the pod to convert (input taken from stdin
 565                  by default).
 566   --libpods    - colon-separated list of pages to search for =item pod
 567                  directives in as targets of C<> and implicit links (empty
 568                  by default).  note, these are not filenames, but rather
 569                  page names like those that appear in L<> links.
 570   --netscape   - will use netscape html directives when applicable.
 571   --nonetscape - will not use netscape directives (default).
 572   --outfile    - filename for the resulting html file (output sent to
 573                  stdout by default).
 574   --podpath    - colon-separated list of directories containing library
 575                  pods.  empty by default.
 576   --podroot    - filesystem base directory from which all relative paths
 577                  in podpath stem (default is .).
 578   --noindex    - don't generate an index at the top of the resulting html.
 579   --norecurse  - don't recurse on those subdirectories listed in podpath.
 580   --recurse    - recurse on those subdirectories listed in podpath
 581                  (default behavior).
 582   --title      - title that will appear in resulting html file.
 583   --header     - produce block header/footer
 584   --css        - stylesheet URL
 585   --verbose    - self-explanatory
 586   --quiet      - supress some benign warning messages
 587
 588 END_OF_USAGE
 589
 590 sub parse_command_line {
 591     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
 592     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 593     my $result = GetOptions(
 594                             'flush'      => \$opt_flush,
 595                             'help'       => \$opt_help,
 596                             'htmldir=s'  => \$opt_htmldir,
 597                             'htmlroot=s' => \$opt_htmlroot,
 598                             'index!'     => \$opt_index,
 599                             'infile=s'   => \$opt_infile,
 600                             'libpods=s'  => \$opt_libpods,
 601                             'netscape!'  => \$opt_netscape,
 602                             'outfile=s'  => \$opt_outfile,
 603                             'podpath=s'  => \$opt_podpath,
 604                             'podroot=s'  => \$opt_podroot,
 605                             'recurse!'   => \$opt_recurse,
 606                             'title=s'    => \$opt_title,
 607                             'header'     => \$opt_header,
 608                             'css=s'      => \$opt_css,
 609                             'verbose'    => \$opt_verbose,
 610                             'quiet'      => \$opt_quiet,
 611                            );
 612     usage("-", "invalid parameters") if not $result;
 613
 614     usage("-") if defined $opt_help;    # see if the user asked for help
 615     $opt_help = "";                     # just to make -w shut-up.
 616
 617     $podfile  = $opt_infile if defined $opt_infile;
 618     $htmlfile = $opt_outfile if defined $opt_outfile;
 619     $htmldir  = $opt_htmldir if defined $opt_outfile;
 620
 621     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 622     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 623
 624     warn "Flushing item and directory caches\n"
 625         if $opt_verbose && defined $opt_flush;
 626     unlink($dircache, $itemcache) if defined $opt_flush;
 627
 628     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 629     $podroot  = $opt_podroot if defined $opt_podroot;
 630
 631     $doindex  = $opt_index if defined $opt_index;
 632     $recurse  = $opt_recurse if defined $opt_recurse;
 633     $title    = $opt_title if defined $opt_title;
 634     $header   = defined $opt_header ? 1 : 0;
 635     $css      = $opt_css if defined $opt_css;
 636     $verbose  = defined $opt_verbose ? 1 : 0;
 637     $quiet    = defined $opt_quiet ? 1 : 0;
 638     $netscape = $opt_netscape if defined $opt_netscape;
 639 }
 640
 641
 642 my $saved_cache_key;
 643
 644 sub get_cache {
 645     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 646     my @cache_key_args = @_;
 647
 648     # A first-level cache:
 649     # Don't bother reading the cache files if they still apply
 650     # and haven't changed since we last read them.
 651
 652     my $this_cache_key = cache_key(@cache_key_args);
 653
 654     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 655
 656     # load the cache of %pages and %items if possible.  $tests will be
 657     # non-zero if successful.
 658     my $tests = 0;
 659     if (-f $dircache && -f $itemcache) {
 660         warn "scanning for item cache\n" if $verbose;
 661         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 662     }
 663
 664     # if we didn't succeed in loading the cache then we must (re)build
 665     #  %pages and %items.
 666     if (!$tests) {
 667         warn "scanning directories in pod-path\n" if $verbose;
 668         scan_podpath($podroot, $recurse, 0);
 669     }
 670     $saved_cache_key = cache_key(@cache_key_args);
 671 }
 672
 673 sub cache_key {
 674     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 675     return join('!', $dircache, $itemcache, $recurse,
 676         @$podpath, $podroot, stat($dircache), stat($itemcache));
 677 }
 678
 679 #
 680 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 681 #  are valid caches of %pages and %items.  if they are valid then it loads
 682 #  them and returns a non-zero value.
 683 #
 684 sub load_cache {
 685     my($dircache, $itemcache, $podpath, $podroot) = @_;
 686     my($tests);
 687     local $_;
 688
 689     $tests = 0;
 690
 691     open(CACHE, "<$itemcache") ||
 692         die "$0: error opening $itemcache for reading: $!\n";
 693     $/ = "\n";
 694
 695     # is it the same podpath?
 696     $_ = <CACHE>;
 697     chomp($_);
 698     $tests++ if (join(":", @$podpath) eq $_);
 699
 700     # is it the same podroot?
 701     $_ = <CACHE>;
 702     chomp($_);
 703     $tests++ if ($podroot eq $_);
 704
 705     # load the cache if its good
 706     if ($tests != 2) {
 707         close(CACHE);
 708         return 0;
 709     }
 710
 711     warn "loading item cache\n" if $verbose;
 712     while (<CACHE>) {
 713         /(.*?) (.*)$/;
 714         $items{$1} = $2;
 715     }
 716     close(CACHE);
 717
 718     warn "scanning for directory cache\n" if $verbose;
 719     open(CACHE, "<$dircache") ||
 720         die "$0: error opening $dircache for reading: $!\n";
 721     $/ = "\n";
 722     $tests = 0;
 723
 724     # is it the same podpath?
 725     $_ = <CACHE>;
 726     chomp($_);
 727     $tests++ if (join(":", @$podpath) eq $_);
 728
 729     # is it the same podroot?
 730     $_ = <CACHE>;
 731     chomp($_);
 732     $tests++ if ($podroot eq $_);
 733
 734     # load the cache if its good
 735     if ($tests != 2) {
 736         close(CACHE);
 737         return 0;
 738     }
 739
 740     warn "loading directory cache\n" if $verbose;
 741     while (<CACHE>) {
 742         /(.*?) (.*)$/;
 743         $pages{$1} = $2;
 744     }
 745
 746     close(CACHE);
 747
 748     return 1;
 749 }
 750
 751 #
 752 # scan_podpath - scans the directories specified in @podpath for directories,
 753 #  .pod files, and .pm files.  it also scans the pod files specified in
 754 #  @libpods for =item directives.
 755 #
 756 sub scan_podpath {
 757     my($podroot, $recurse, $append) = @_;
 758     my($pwd, $dir);
 759     my($libpod, $dirname, $pod, @files, @poddata);
 760
 761     unless($append) {
 762         %items = ();
 763         %pages = ();
 764     }
 765
 766     # scan each directory listed in @podpath
 767     $pwd = getcwd();
 768     chdir($podroot)
 769         || die "$0: error changing to directory $podroot: $!\n";
 770     foreach $dir (@podpath) {
 771         scan_dir($dir, $recurse);
 772     }
 773
 774     # scan the pods listed in @libpods for =item directives
 775     foreach $libpod (@libpods) {
 776         # if the page isn't defined then we won't know where to find it
 777         # on the system.
 778         next unless defined $pages{$libpod} && $pages{$libpod};
 779
 780         # if there is a directory then use the .pod and .pm files within it.
 781         # NOTE: Only finds the first so-named directory in the tree.
 782 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 783         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 784             #  find all the .pod and .pm files within the directory
 785             $dirname = $1;
 786             opendir(DIR, $dirname) ||
 787                 die "$0: error opening directory $dirname: $!\n";
 788             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 789             closedir(DIR);
 790
 791             # scan each .pod and .pm file for =item directives
 792             foreach $pod (@files) {
 793                 open(POD, "<$dirname/$pod") ||
 794                     die "$0: error opening $dirname/$pod for input: $!\n";
 795                 @poddata = <POD>;
 796                 close(POD);
 797                 clean_data( \@poddata );
 798
 799                 scan_items( \%items, "$dirname/$pod", @poddata);
 800             }
 801
 802             # use the names of files as =item directives too.
 803 ### Don't think this should be done this way - confuses issues.(WL)
 804 ###         foreach $pod (@files) {
 805 ###             $pod =~ /^(.*)(\.pod|\.pm)$/;
 806 ###             $items{$1} = "$dirname/$1.html" if $1;
 807 ###         }
 808         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 809                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 810             # scan the .pod or .pm file for =item directives
 811             $pod = $1;
 812             open(POD, "<$pod") ||
 813                 die "$0: error opening $pod for input: $!\n";
 814             @poddata = <POD>;
 815             close(POD);
 816             clean_data( \@poddata );
 817
 818             scan_items( \%items, "$pod", @poddata);
 819         } else {
 820             warn "$0: shouldn't be here (line ".__LINE__."\n";
 821         }
 822     }
 823     @poddata = ();      # clean-up a bit
 824
 825     chdir($pwd)
 826         || die "$0: error changing to directory $pwd: $!\n";
 827
 828     # cache the item list for later use
 829     warn "caching items for later use\n" if $verbose;
 830     open(CACHE, ">$itemcache") ||
 831         die "$0: error open $itemcache for writing: $!\n";
 832
 833     print CACHE join(":", @podpath) . "\n$podroot\n";
 834     foreach my $key (keys %items) {
 835         print CACHE "$key $items{$key}\n";
 836     }
 837
 838     close(CACHE);
 839
 840     # cache the directory list for later use
 841     warn "caching directories for later use\n" if $verbose;
 842     open(CACHE, ">$dircache") ||
 843         die "$0: error open $dircache for writing: $!\n";
 844
 845     print CACHE join(":", @podpath) . "\n$podroot\n";
 846     foreach my $key (keys %pages) {
 847         print CACHE "$key $pages{$key}\n";
 848     }
 849
 850     close(CACHE);
 851 }
 852
 853 #
 854 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 855 #  files, and .pm files.  notes those that it finds.  this information will
 856 #  be used later in order to figure out where the pages specified in L<>
 857 #  links are on the filesystem.
 858 #
 859 sub scan_dir {
 860     my($dir, $recurse) = @_;
 861     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 862     local $_;
 863
 864     @subdirs = ();
 865     @pods = ();
 866
 867     opendir(DIR, $dir) ||
 868         die "$0: error opening directory $dir: $!\n";
 869     while (defined($_ = readdir(DIR))) {
 870         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 871             $pages{$_}  = "" unless defined $pages{$_};
 872             $pages{$_} .= "$dir/$_:";
 873             push(@subdirs, $_);
 874         } elsif (/\.pod$/) {                                # .pod
 875             s/\.pod$//;
 876             $pages{$_}  = "" unless defined $pages{$_};
 877             $pages{$_} .= "$dir/$_.pod:";
 878             push(@pods, "$dir/$_.pod");
 879         } elsif (/\.pm$/) {                                 # .pm
 880             s/\.pm$//;
 881             $pages{$_}  = "" unless defined $pages{$_};
 882             $pages{$_} .= "$dir/$_.pm:";
 883             push(@pods, "$dir/$_.pm");
 884         }
 885     }
 886     closedir(DIR);
 887
 888     # recurse on the subdirectories if necessary
 889     if ($recurse) {
 890         foreach my $subdir (@subdirs) {
 891             scan_dir("$dir/$subdir", $recurse);
 892         }
 893     }
 894 }
 895
 896 #
 897 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 898 #  build an index.
 899 #
 900 sub scan_headings {
 901     my($sections, @data) = @_;
 902     my($tag, $which_head, $otitle, $listdepth, $index);
 903
 904     # here we need      local $ignore = 0;
 905     #  unfortunately, we can't have it, because $ignore is lexical
 906     $ignore = 0;
 907
 908     $listdepth = 0;
 909     $index = "";
 910
 911     # scan for =head directives, note their name, and build an index
 912     #  pointing to each of them.
 913     foreach my $line (@data) {
 914         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 915             ($tag, $which_head, $otitle) = ($1,$2,$3);
 916
 917             my $title = depod( $otitle );
 918             my $name = htmlify( $title );
 919             $$sections{$name} = 1;
 920             $title = process_text( \$otitle );
 921
 922             while ($which_head != $listdepth) {
 923                 if ($which_head > $listdepth) {
 924                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 925                     $listdepth++;
 926                 } elsif ($which_head < $listdepth) {
 927                     $listdepth--;
 928                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 929                 }
 930             }
 931
 932             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 933                       "<A HREF=\"#" . $name . "\">" .
 934                       $title . "</A></LI>";
 935         }
 936     }
 937
 938     # finish off the lists
 939     while ($listdepth--) {
 940         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 941     }
 942
 943     # get rid of bogus lists
 944     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 945
 946     $ignore = 1;        # restore old value;
 947
 948     return $index;
 949 }
 950
 951 #
 952 # scan_items - scans the pod specified by $pod for =item directives.  we
 953 #  will use this information later on in resolving C<> links.
 954 #
 955 sub scan_items {
 956     my( $itemref, $pod, @poddata ) = @_;
 957     my($i, $item);
 958     local $_;
 959
 960     $pod =~ s/\.pod$//;
 961     $pod .= ".html" if $pod;
 962
 963     foreach $i (0..$#poddata) {
 964         my $txt = depod( $poddata[$i] );
 965
 966         # figure out what kind of item it is.
 967         # Build string for referencing this item.
 968         if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
 969             next unless $1;
 970             $item = $1;
 971         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
 972             $item = $1;
 973         } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
 974             $item = $1;
 975         } else {
 976             next;
 977         }
 978         my $fid = fragment_id( $item );
 979         $$itemref{$fid} = "$pod" if $fid;
 980     }
 981 }
 982
 983 #
 984 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 985 #
 986 sub process_head {
 987     my($tag, $heading, $hasindex) = @_;
 988
 989     # figure out the level of the =head
 990     $tag =~ /head([1-6])/;
 991     my $level = $1;
 992
 993     if( $listlevel ){
 994         warn "$0: $podfile: unterminated list at =head in paragraph $paragraph.  ignoring.\n";
 995         while( $listlevel ){
 996             process_back();
 997         }
 998     }
 999
1000     print HTML "<P>\n";
1001     if( $level == 1 && ! $top ){
1002         print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1003             if $hasindex;
1004         print HTML "<HR>\n"
1005     }
1006
1007     my $name = htmlify( depod( $heading ) );
1008     my $convert = process_text( \$heading );
1009     print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
1010 }
1011
1012
1013 #
1014 # emit_item_tag - print an =item's text
1015 # Note: The global $EmittedItem is used for inhibiting self-references.
1016 #
1017 my $EmittedItem;
1018
1019 sub emit_item_tag($$$){
1020     my( $otext, $text, $compact ) = @_;
1021     my $item = fragment_id( $text );
1022
1023     $EmittedItem = $item;
1024     ### print STDERR "emit_item_tag=$item ($text)\n";
1025
1026     print HTML '<STRONG>';
1027     if ($items_named{$item}++) {
1028         print HTML process_text( \$otext );
1029     } else {
1030         my $name = 'item_' . $item;
1031         print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1032     }
1033     print HTML "</STRONG><BR>\n";
1034     undef( $EmittedItem );
1035 }
1036
1037 sub emit_li {
1038     my( $tag ) = @_;
1039     if( $items_seen[$listlevel]++ == 0 ){
1040         push( @listend, "</$tag>" );
1041         print HTML "<$tag>\n";
1042     }
1043     print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1044 }
1045
1046 #
1047 # process_item - convert a pod item tag and convert it to HTML format.
1048 #
1049 sub process_item {
1050     my( $otext ) = @_;
1051
1052     # lots of documents start a list without doing an =over.  this is
1053     # bad!  but, the proper thing to do seems to be to just assume
1054     # they did do an =over.  so warn them once and then continue.
1055     if( $listlevel == 0 ){
1056         warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n";
1057         process_over();
1058     }
1059
1060     # formatting: insert a paragraph if preceding item has >1 paragraph
1061     if( $after_lpar ){
1062         print HTML "<P></P>\n";
1063         $after_lpar = 0;
1064     }
1065
1066     # remove formatting instructions from the text
1067     my $text = depod( $otext );
1068
1069     # all the list variants:
1070     if( $text =~ /\A\*/ ){ # bullet
1071         emit_li( 'UL' );
1072         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1073             my $tag = $1;
1074             $otext =~ s/\A\*\s+//;
1075             emit_item_tag( $otext, $tag, 1 );
1076         }
1077
1078     } elsif( $text =~ /\A\d+/ ){ # numbered list
1079         emit_li( 'OL' );
1080         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1081             my $tag = $1;
1082             $otext =~ s/\A\d+\.?\s*//;
1083             emit_item_tag( $otext, $tag, 1 );
1084         }
1085
1086     } else {                    # definition list
1087         emit_li( 'DL' );
1088         if ($text =~ /\A(.+)\Z/s ){ # should have text
1089             emit_item_tag( $otext, $text, 1 );
1090         }
1091        print HTML '<DD>';
1092     }
1093     print HTML "\n";
1094 }
1095
1096 #
1097 # process_over - process a pod over tag and start a corresponding HTML list.
1098 #
1099 sub process_over {
1100     # start a new list
1101     $listlevel++;
1102     push( @items_seen, 0 );
1103     $after_lpar = 0;
1104 }
1105
1106 #
1107 # process_back - process a pod back tag and convert it to HTML format.
1108 #
1109 sub process_back {
1110     if( $listlevel == 0 ){
1111         warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n";
1112         return;
1113     }
1114
1115     # close off the list.  note, I check to see if $listend[$listlevel] is
1116     # defined because an =item directive may have never appeared and thus
1117     # $listend[$listlevel] may have never been initialized.
1118     $listlevel--;
1119     if( defined $listend[$listlevel] ){
1120         print HTML '<P></P>' if $after_lpar;
1121         print HTML $listend[$listlevel];
1122         print HTML "\n";
1123         pop( @listend );
1124     }
1125     $after_lpar = 0;
1126
1127     # clean up item count
1128     pop( @items_seen );
1129 }
1130
1131 #
1132 # process_cut - process a pod cut tag, thus start ignoring pod directives.
1133 #
1134 sub process_cut {
1135     $ignore = 1;
1136 }
1137
1138 #
1139 # process_pod - process a pod pod tag, thus stop ignoring pod directives
1140 # until we see a corresponding cut.
1141 #
1142 sub process_pod {
1143     # no need to set $ignore to 0 cause the main loop did it
1144 }
1145
1146 #
1147 # process_for - process a =for pod tag.  if it's for html, spit
1148 # it out verbatim, if illustration, center it, otherwise ignore it.
1149 #
1150 sub process_for {
1151     my($whom, $text) = @_;
1152     if ( $whom =~ /^(pod2)?html$/i) {
1153         print HTML $text;
1154     } elsif ($whom =~ /^illustration$/i) {
1155         1 while chomp $text;
1156         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1157           $text .= $ext, last if -r "$text$ext";
1158         }
1159         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1160     }
1161 }
1162
1163 #
1164 # process_begin - process a =begin pod tag.  this pushes
1165 # whom we're beginning on the begin stack.  if there's a
1166 # begin stack, we only print if it us.
1167 #
1168 sub process_begin {
1169     my($whom, $text) = @_;
1170     $whom = lc($whom);
1171     push (@begin_stack, $whom);
1172     if ( $whom =~ /^(pod2)?html$/) {
1173         print HTML $text if $text;
1174     }
1175 }
1176
1177 #
1178 # process_end - process a =end pod tag.  pop the
1179 # begin stack.  die if we're mismatched.
1180 #
1181 sub process_end {
1182     my($whom, $text) = @_;
1183     $whom = lc($whom);
1184     if ($begin_stack[-1] ne $whom ) {
1185         die "Unmatched begin/end at chunk $paragraph\n"
1186     }
1187     pop( @begin_stack );
1188 }
1189
1190 #
1191 # process_pre - indented paragraph, made into <PRE></PRE>
1192 #
1193 sub process_pre {
1194     my( $text ) = @_;
1195     my( $rest );
1196     return if $ignore;
1197
1198     $rest = $$text;
1199
1200     # insert spaces in place of tabs
1201     $rest =~ s#.*#
1202             my $line = $&;
1203             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1204             $line;
1205         #eg;
1206
1207     # convert some special chars to HTML escapes
1208     $rest =~ s/&/&amp;/g;
1209     $rest =~ s/</&lt;/g;
1210     $rest =~ s/>/&gt;/g;
1211     $rest =~ s/"/&quot;/g;
1212
1213     # try and create links for all occurrences of perl.* within
1214     # the preformatted text.
1215     $rest =~ s{
1216                  (\s*)(perl\w+)
1217               }{
1218                  if ( defined $pages{$2} ){     # is a link
1219                      qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1220                  } elsif (defined $pages{dosify($2)}) { # is a link
1221                      qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1222                  } else {
1223                      "$1$2";
1224                  }
1225               }xeg;
1226      $rest =~ s{
1227                  (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1228                }{
1229                   my $url ;
1230                   if ( $htmlfileurl ne '' ){
1231                      # Here, we take advantage of the knowledge
1232                      # that $htmlfileurl ne '' implies $htmlroot eq ''.
1233                      # Since $htmlroot eq '', we need to prepend $htmldir
1234                      # on the fron of the link to get the absolute path
1235                      # of the link's target. We check for a leading '/'
1236                      # to avoid corrupting links that are #, file:, etc.
1237                      my $old_url = $3 ;
1238                      $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1239                      $url = relativize_url( "$old_url.html", $htmlfileurl );
1240                   } else {
1241                      $url = "$3.html" ;
1242                   }
1243                   "$1$url" ;
1244                }xeg;
1245
1246     # Look for embedded URLs and make them into links.  We don't
1247     # relativize them since they are best left as the author intended.
1248
1249     my $urls = '(' . join ('|', qw{
1250                 http
1251                 telnet
1252                 mailto
1253                 news
1254                 gopher
1255                 file
1256                 wais
1257                 ftp
1258             } )
1259         . ')';
1260
1261     my $ltrs = '\w';
1262     my $gunk = '/#~:.?+=&%@!\-';
1263     my $punc = '.:?\-';
1264     my $any  = "${ltrs}${gunk}${punc}";
1265
1266     $rest =~ s{
1267         \b                          # start at word boundary
1268         (                           # begin $1  {
1269           $urls     :               # need resource and a colon
1270           (?!:)                     # Ignore File::, among others.
1271           [$any] +?                 # followed by on or more
1272                                     #  of any valid character, but
1273                                     #  be conservative and take only
1274                                     #  what you need to....
1275         )                           # end   $1  }
1276         (?=                         # look-ahead non-consumptive assertion
1277                 [$punc]*            # either 0 or more puntuation
1278                 [^$any]             #   followed by a non-url char
1279             |                       # or else
1280                 $                   #   then end of the string
1281         )
1282       }{<A HREF="$1">$1</A>}igox;
1283
1284     # text should be as it is (verbatim)
1285     $$text = $rest;
1286 }
1287
1288
1289 #
1290 # pure text processing
1291 #
1292 # pure_text/inIS_text: differ with respect to automatic C<> recognition.
1293 # we don't want this to happen within IS
1294 #
1295 sub pure_text($){
1296     my $text = shift();
1297     process_puretext( $text, \$ptQuote, 1 );
1298 }
1299
1300 sub inIS_text($){
1301     my $text = shift();
1302     process_puretext( $text, \$ptQuote, 0 );
1303 }
1304
1305 #
1306 # process_puretext - process pure text (without pod-escapes) converting
1307 #  double-quotes and handling implicit C<> links.
1308 #
1309 sub process_puretext {
1310     my($text, $quote, $notinIS) = @_;
1311
1312     ## Guessing at func() or [$@%&]*var references in plain text is destined
1313     ## to produce some strange looking ref's. uncomment to disable:
1314     ## $notinIS = 0;
1315
1316     my(@words, $lead, $trail);
1317
1318     # convert double-quotes to single-quotes
1319     if( $$quote && $text =~ s/"/''/s ){
1320         $$quote = 0;
1321     }
1322     while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1323     $$quote = 1 if $text =~ s/"/``/s;
1324
1325     # keep track of leading and trailing white-space
1326     $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1327     $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1328
1329     # split at space/non-space boundaries
1330     @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1331
1332     # process each word individually
1333     foreach my $word (@words) {
1334         # skip space runs
1335         next if $word =~ /^\s*$/;
1336         # see if we can infer a link
1337         if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
1338             # has parenthesis so should have been a C<> ref
1339             ## try for a pagename (perlXXX(1))?
1340             if( $2 =~ /^\d+$/ ){
1341                 my $url = page_sect( $word, '' );
1342                 if( defined $url ){
1343                     $word = "<A HREF=\"$url\">the $word manpage</A>";
1344                     next;
1345                 }
1346             }
1347             $word = emit_C( $word );
1348
1349 #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1350 ##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1351 ##          # perl variables, should be a C<> ref
1352 ##          $word = emit_C( $word );
1353
1354         } elsif ($word =~ m,^\w+://\w,) {
1355             # looks like a URL
1356             # Don't relativize it: leave it as the author intended
1357             $word = qq(<A HREF="$word">$word</A>);
1358         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1359             # looks like an e-mail address
1360             my ($w1, $w2, $w3) = ("", $word, "");
1361             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1362             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1363             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1364         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1365             $word = html_escape($word) if $word =~ /["&<>]/;
1366             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1367         } else {
1368             $word = html_escape($word) if $word =~ /["&<>]/;
1369         }
1370     }
1371
1372     # put everything back together
1373     return $lead . join( '', @words ) . $trail;
1374 }
1375
1376
1377 #
1378 # process_text - handles plaintext that appears in the input pod file.
1379 # there may be pod commands embedded within the text so those must be
1380 # converted to html commands.
1381 #
1382
1383 sub process_text1($$;$);
1384
1385 sub process_text {
1386     return if $ignore;
1387     my( $tref ) = @_;
1388     my $res = process_text1( 0, $tref );
1389     $$tref = $res;
1390 }
1391
1392 sub process_text1($$;$){
1393     my( $lev, $rstr, $func ) = @_;
1394     $lev++ unless defined $func;
1395     my $res = '';
1396
1397     if( $func eq 'B' ){
1398         # B<text> - boldface
1399         $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1400
1401     } elsif( $func eq 'C' ){
1402         # C<code> - can be a ref or <CODE></CODE>
1403         # need to extract text
1404         my $par = go_ahead( $rstr, 'C' );
1405
1406         ## clean-up of the link target
1407         my $text = depod( $par );
1408
1409         ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1410         ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1411
1412         $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1413
1414     } elsif( $func eq 'E' ){
1415         # E<x> - convert to character
1416         $$rstr =~ s/^(\w+)>//;
1417         $res = "&$1;";
1418
1419     } elsif( $func eq 'F' ){
1420         # F<filename> - italizice
1421         $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1422
1423     } elsif( $func eq 'I' ){
1424         # I<text> - italizice
1425         $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1426
1427     } elsif( $func eq 'L' ){
1428         # L<link> - link
1429         ## L<text|cross-ref> => produce text, use cross-ref for linking
1430         ## L<cross-ref> => make text from cross-ref
1431         ## need to extract text
1432         my $par = go_ahead( $rstr, 'L' );
1433
1434         # some L<>'s that shouldn't be:
1435         # a) full-blown URL's are emitted as-is
1436         if( $par =~ m{^\w+://}s ){
1437             return make_URL_href( $par );
1438         }
1439         # b) C<...> is stripped and treated as C<>
1440         if( $par =~ /^C<(.*)>$/ ){
1441             my $text = depod( $1 );
1442             return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1443         }
1444
1445         # analyze the contents
1446         $par =~ s/\n/ /g;   # undo word-wrapped tags
1447         my $opar = $par;
1448         my $linktext;
1449         if( $par =~ s{^([^|]+)\|}{} ){
1450             $linktext = $1;
1451         }
1452
1453         # make sure sections start with a /
1454         $par =~ s{^"}{/"};
1455
1456         my( $page, $section, $ident );
1457
1458         # check for link patterns
1459         if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1460             # we've got a name/ident (no quotes)
1461             ( $page, $ident ) = ( $1, $2 );
1462             ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1463
1464         } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1465             # even though this should be a "section", we go for ident first
1466             ( $page, $ident ) = ( $1, $2 );
1467             ### print STDERR "--> L<$par> to page $page, section $section\n";
1468
1469         } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1470             ( $page, $section ) = ( '', $par );
1471             ### print STDERR "--> L<$par> to void page, section $section\n";
1472
1473         } else {
1474             ( $page, $section ) = ( $par, '' );
1475             ### print STDERR "--> L<$par> to page $par, void section\n";
1476         }
1477
1478         # now, either $section or $ident is defined. the convoluted logic
1479         # below tries to resolve L<> according to what the user specified.
1480         # failing this, we try to find the next best thing...
1481         my( $url, $ltext, $fid );
1482
1483         RESOLVE: {
1484             if( defined $ident ){
1485                 ## try to resolve $ident as an item
1486                 ( $url, $fid ) = coderef( $page, $ident );
1487                 if( $url ){
1488                     if( ! defined( $linktext ) ){
1489                         $linktext = $ident;
1490                         $linktext .= " in " if $ident && $page;
1491                         $linktext .= "the $page manpage" if $page;
1492                     }
1493                     ###  print STDERR "got coderef url=$url\n";
1494                     last RESOLVE;
1495                 }
1496                 ## no luck: go for a section (auto-quoting!)
1497                 $section = $ident;
1498             }
1499             ## now go for a section
1500             my $htmlsection = htmlify( $section );
1501             $url = page_sect( $page, $htmlsection );
1502             if( $url ){
1503                 if( ! defined( $linktext ) ){
1504                     $linktext = $section;
1505                     $linktext .= " in " if $section && $page;
1506                     $linktext .= "the $page manpage" if $page;
1507                 }
1508                 ### print STDERR "got page/section url=$url\n";
1509                 last RESOLVE;
1510             }
1511             ## no luck: go for an ident
1512             if( $section ){
1513                 $ident = $section;
1514             } else {
1515                 $ident = $page;
1516                 $page  = undef();
1517             }
1518             ( $url, $fid ) = coderef( $page, $ident );
1519             if( $url ){
1520                 if( ! defined( $linktext ) ){
1521                     $linktext = $ident;
1522                     $linktext .= " in " if $ident && $page;
1523                     $linktext .= "the $page manpage" if $page;
1524                 }
1525                 ### print STDERR "got section=>coderef url=$url\n";
1526                 last RESOLVE;
1527             }
1528
1529             # warning; show some text.
1530             $linktext = $opar unless defined $linktext;
1531             warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1532         }
1533
1534         # now we have an URL or just plain code
1535         $$rstr = $linktext . '>' . $$rstr;
1536         if( defined( $url ) ){
1537             $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1538         } else {
1539             $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1540         }
1541
1542     } elsif( $func eq 'S' ){
1543         # S<text> - non-breaking spaces
1544         $res = process_text1( $lev, $rstr );
1545         $res =~ s/ /&nbsp;/g;
1546
1547     } elsif( $func eq 'X' ){
1548         # X<> - ignore
1549         $$rstr =~ s/^[^>]*>//;
1550
1551     } elsif( $func eq 'Z' ){
1552         # Z<> - empty
1553         warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1554             unless $$rstr =~ s/^>//;
1555
1556     } else {
1557         while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1558             # all others: either recurse into new function or
1559             # terminate at closing angle bracket
1560             my $pt = $1;
1561             $pt .= '>' if $2 eq '>' &&  $lev == 1;
1562             $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1563             return $res if $2 eq '>' && $lev > 1;
1564             if( $2 ne '>' ){
1565                 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1566             }
1567
1568         }
1569         if( $lev == 1 ){
1570             $res .= pure_text( $$rstr );
1571         } else {
1572             warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1573         }
1574     }
1575     return $res;
1576 }
1577
1578 #
1579 # go_ahead: extract text of an IS (can be nested)
1580 #
1581 sub go_ahead($$){
1582     my( $rstr, $func ) = @_;
1583     my $res = '';
1584     my $level = 1;
1585     while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1586         $res .= $1;
1587         if( $2 eq '>' ){
1588             return $res if --$level == 0;
1589         } else {
1590             ++$level;
1591         }
1592         $res .= $2;
1593     }
1594     warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1595     return $res;
1596 }
1597
1598 #
1599 # emit_C - output result of C<text>
1600 #    $text is the depod-ed text
1601 #
1602 sub emit_C($;$){
1603     my( $text, $nocode ) = @_;
1604     my $res;
1605     my( $url, $fid ) = coderef( undef(), $text );
1606
1607     # need HTML-safe text
1608     my $linktext = html_escape( $text );
1609
1610     if( defined( $url ) &&
1611         (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1612         $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1613     } elsif( 0 && $nocode ){
1614         $res = $linktext;
1615     } else {
1616         $res = "<CODE>$linktext</CODE>";
1617     }
1618     return $res;
1619 }
1620
1621 #
1622 # html_escape: make text safe for HTML
1623 #
1624 sub html_escape {
1625     my $rest = $_[0];
1626     $rest   =~ s/&/&amp;/g;
1627     $rest   =~ s/</&lt;/g;
1628     $rest   =~ s/>/&gt;/g;
1629     $rest   =~ s/"/&quot;/g;
1630     return $rest;
1631 }
1632
1633
1634 #
1635 # dosify - convert filenames to 8.3
1636 #
1637 sub dosify {
1638     my($str) = @_;
1639     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1640     if ($Is83) {
1641         $str = lc $str;
1642         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1643         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1644     }
1645     return $str;
1646 }
1647
1648 #
1649 # page_sect - make an URL from the text of a L<>
1650 #
1651 sub page_sect($$) {
1652     my( $page, $section ) = @_;
1653     my( $linktext, $page83, $link);     # work strings
1654
1655     # check if we know that this is a section in this page
1656     if (!defined $pages{$page} && defined $sections{$page}) {
1657         $section = $page;
1658         $page = "";
1659         ### print STDERR "reset page='', section=$section\n";
1660     }
1661
1662     $page83=dosify($page);
1663     $page=$page83 if (defined $pages{$page83});
1664     if ($page eq "") {
1665         $link = "#" . htmlify( $section );
1666     } elsif ( $page =~ /::/ ) {
1667         $page =~ s,::,/,g;
1668         # Search page cache for an entry keyed under the html page name,
1669         # then look to see what directory that page might be in.  NOTE:
1670         # this will only find one page. A better solution might be to produce
1671         # an intermediate page that is an index to all such pages.
1672         my $page_name = $page ;
1673         $page_name =~ s,^.*/,, ;
1674         if ( defined( $pages{ $page_name } ) &&
1675              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1676            ) {
1677             $page = $1 ;
1678         }
1679         else {
1680             # NOTE: This branch assumes that all A::B pages are located in
1681             # $htmlroot/A/B.html . This is often incorrect, since they are
1682             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1683             # analyze the contents of %pages and figure out where any
1684             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1685             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1686             # lib/A/B.pm. This is also limited, but it's an improvement.
1687             # Maybe a hints file so that the links point to the correct places
1688             # nonetheless?
1689
1690         }
1691         $link = "$htmlroot/$page.html";
1692         $link .= "#" . htmlify( $section ) if ($section);
1693     } elsif (!defined $pages{$page}) {
1694         $link = "";
1695     } else {
1696         $section = htmlify( $section ) if $section ne "";
1697         ### print STDERR "...section=$section\n";
1698
1699         # if there is a directory by the name of the page, then assume that an
1700         # appropriate section will exist in the subdirectory
1701 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1702         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1703             $link = "$htmlroot/$1/$section.html";
1704             ### print STDERR "...link=$link\n";
1705
1706         # since there is no directory by the name of the page, the section will
1707         # have to exist within a .html of the same name.  thus, make sure there
1708         # is a .pod or .pm that might become that .html
1709         } else {
1710             $section = "#$section" if $section;
1711             ### print STDERR "...section=$section\n";
1712
1713             # check if there is a .pod with the page name
1714             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1715                 $link = "$htmlroot/$1.html$section";
1716             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1717                 $link = "$htmlroot/$1.html$section";
1718             } else {
1719                 $link = "";
1720             }
1721         }
1722     }
1723
1724     if ($link) {
1725         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1726         # implies $htmlroot eq ''. This means that the link in question
1727         # needs a prefix of $htmldir if it begins with '/'. The test for
1728         # the initial '/' is done to avoid '#'-only links, and to allow
1729         # for other kinds of links, like file:, ftp:, etc.
1730         my $url ;
1731         if (  $htmlfileurl ne '' ) {
1732             $link = "$htmldir$link" if $link =~ m{^/};
1733             $url = relativize_url( $link, $htmlfileurl );
1734 # print( "  b: [$link,$htmlfileurl,$url]\n" );
1735         }
1736         else {
1737             $url = $link ;
1738         }
1739         return $url;
1740
1741     } else {
1742         return undef();
1743     }
1744 }
1745
1746 #
1747 # relativize_url - convert an absolute URL to one relative to a base URL.
1748 # Assumes both end in a filename.
1749 #
1750 sub relativize_url {
1751     my ($dest,$source) = @_ ;
1752
1753     my ($dest_volume,$dest_directory,$dest_file) =
1754         File::Spec::Unix->splitpath( $dest ) ;
1755     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1756
1757     my ($source_volume,$source_directory,$source_file) =
1758         File::Spec::Unix->splitpath( $source ) ;
1759     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1760
1761     my $rel_path = '' ;
1762     if ( $dest ne '' ) {
1763        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1764     }
1765
1766     if ( $rel_path ne ''                &&
1767          substr( $rel_path, -1 ) ne '/' &&
1768          substr( $dest_file, 0, 1 ) ne '#'
1769         ) {
1770         $rel_path .= "/$dest_file" ;
1771     }
1772     else {
1773         $rel_path .= "$dest_file" ;
1774     }
1775
1776     return $rel_path ;
1777 }
1778
1779
1780 #
1781 # coderef - make URL from the text of a C<>
1782 #
1783 sub coderef($$){
1784     my( $page, $item ) = @_;
1785     my( $url );
1786
1787     my $fid = fragment_id( $item );
1788
1789     if( defined( $page ) ){
1790         # we have been given a $page...
1791         $page =~ s{::}{/}g;
1792
1793         # Do we take it? Item could be a section!
1794         my $base = $items{$fid};
1795         $base =~ s{[^/]*/}{};
1796         if( $base ne "$page.html" ){
1797             ###   print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1798             $page = undef();
1799         }
1800
1801     } else {
1802         # no page - local items precede cached items
1803         if(  exists $local_items{$fid} ){
1804             $page = $local_items{$fid};
1805         } else {
1806             $page = $items{$fid};
1807         }
1808     }
1809
1810     # if there was a pod file that we found earlier with an appropriate
1811     # =item directive, then create a link to that page.
1812     if( defined $page ){
1813         if( $page ){
1814             if( $pages{$page} =~ /([^:.]*)\.[^:]*:/){
1815                 $page = $1 . '.html';
1816             }
1817             my $link = "$htmlroot/$page#item_$fid";
1818
1819             # Here, we take advantage of the knowledge that $htmlfileurl
1820             # ne '' implies $htmlroot eq ''.
1821             if (  $htmlfileurl ne '' ) {
1822                 $link = "$htmldir$link" ;
1823                 $url = relativize_url( $link, $htmlfileurl ) ;
1824             } else {
1825                 $url = $link ;
1826             }
1827         } else {
1828             $url = "#item_" . $fid;
1829         }
1830
1831         confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1832     }
1833     return( $url, $fid );
1834 }
1835
1836
1837
1838 #
1839 # Adapted from Nick Ing-Simmons' PodToHtml package.
1840 sub relative_url {
1841     my $source_file = shift ;
1842     my $destination_file = shift;
1843
1844     my $source = URI::file->new_abs($source_file);
1845     my $uo = URI::file->new($destination_file,$source)->abs;
1846     return $uo->rel->as_string;
1847 }
1848
1849
1850 #
1851 # finish_list - finish off any pending HTML lists.  this should be called
1852 # after the entire pod file has been read and converted.
1853 #
1854 sub finish_list {
1855     while ($listlevel > 0) {
1856         print HTML "</DL>\n";
1857         $listlevel--;
1858     }
1859 }
1860
1861 #
1862 # htmlify - converts a pod section specification to a suitable section
1863 # specification for HTML. Note that we keep spaces and special characters
1864 # except ", ? (Netscape problem) and the hyphen (writer's problem...).
1865 #
1866 sub htmlify {
1867     my( $heading) = @_;
1868     $heading =~ s/(\s+)/ /g;
1869     $heading =~ s/\s+\Z//;
1870     $heading =~ s/\A\s+//;
1871     # The hyphen is a disgrace to the English language.
1872     $heading =~ s/[-"?]//g;
1873     $heading = lc( $heading );
1874     return $heading;
1875 }
1876
1877 #
1878 # depod - convert text by eliminating all interior sequences
1879 # Note: can be called with copy or modify semantics
1880 #
1881 my %E2c;
1882 $E2c{lt} = '<';
1883 $E2c{gt} = '>';
1884 $E2c{sol} = '/';
1885 $E2c{verbar} = '|';
1886
1887 sub depod1($;$);
1888
1889 sub depod($){
1890     my $string;
1891     if( ref( $_[0] ) ){
1892         $string =  ${$_[0]};
1893         ${$_[0]} = depod1( \$string );
1894     } else {
1895         $string =  $_[0];
1896         depod1( \$string );
1897     }
1898 }
1899
1900 sub depod1($;$){
1901   my( $rstr, $func ) = @_;
1902   my $res = '';
1903   if( ! defined( $func ) ){
1904       # skip to next begin of an interior sequence
1905       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1906          # recurse into its text
1907          $res .= $1 . depod1( $rstr, $2 );
1908       }
1909       $res .= $$rstr;
1910   } elsif( $func eq 'E' ){
1911       # E<x> - convert to character
1912       $$rstr =~ s/^(\w+)>//;
1913       $res .= $E2c{$1};
1914   } elsif( $func eq 'X' ){
1915       # X<> - ignore
1916       $$rstr =~ s/^[^>]*>//;
1917   } elsif( $func eq 'Z' ){
1918       # Z<> - empty
1919       $$rstr =~ s/^>//;
1920   } else {
1921       # all others: either recurse into new function or
1922       # terminate at closing angle bracket
1923       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1924           $res .= $1;
1925           last if $2 eq '>';
1926           $res .= depod1( $rstr, substr($2,0,1) );
1927       }
1928       ## If we're here and $2 ne '>': undelimited interior sequence.
1929       ## Ignored, as this is called without proper indication of where we are.
1930       ## Rely on process_text to produce diagnostics.
1931   }
1932   return $res;
1933 }
1934
1935 #
1936 # fragment_id - construct a fragment identifier from:
1937 #   a) =item text
1938 #   b) contents of C<...>
1939 #
1940 my @hc;
1941 sub fragment_id {
1942     my $text = shift();
1943     $text =~ s/\s+\Z//s;
1944     if( $text ){
1945         # a method or function?
1946         return $1 if $text =~ /(\w+)\s*\(/;
1947         return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1948
1949         # a variable name?
1950         return $1 if $text =~ /^([$@%*]\S+)/;
1951
1952         # some pattern matching operator?
1953         return $1 if $text =~ m|^(\w+/).*/\w*$|;
1954
1955         # fancy stuff... like "do { }"
1956         return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1957
1958         # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1959         # and some funnies with ... Module ...
1960         return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1961         return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1962
1963         # text? normalize!
1964         $text =~ s/\s+/_/sg;
1965         $text =~ s{(\W)}{
1966          defined( $hc[ord($1)] ) ? $hc[ord($1)]
1967                  : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1968         $text = substr( $text, 0, 50 );
1969     } else {
1970         return undef();
1971     }
1972 }
1973
1974 #
1975 # make_URL_href - generate HTML href from URL
1976 # Special treatment for CGI queries.
1977 #
1978 sub make_URL_href($){
1979     my( $url ) = @_;
1980     if( $url !~
1981         s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)?$}{<A HREF="$1$2">$1</A>}i ){
1982         $url = "<A HREF=\"$url\">$url</A>";
1983     }
1984     return $url;
1985 }
1986
1987 1;