lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::Spec::Unix;
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.03;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files. Not passing this
  54 causes all links to be absolute, since this is the value that tells
  55 Pod::Html the root of the documentation tree.
  56
  57 =item htmlroot
  58
  59     --htmlroot=name
  60
  61 Sets the base URL for the HTML files.  When cross-references are made,
  62 the HTML root is prepended to the URL.
  63
  64 =item infile
  65
  66     --infile=name
  67
  68 Specify the pod file to convert.  Input is taken from STDIN if no
  69 infile is specified.
  70
  71 =item outfile
  72
  73     --outfile=name
  74
  75 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  76 is specified.
  77
  78 =item podroot
  79
  80     --podroot=name
  81
  82 Specify the base directory for finding library pods.
  83
  84 =item podpath
  85
  86     --podpath=name:...:name
  87
  88 Specify which subdirectories of the podroot contain pod files whose
  89 HTML converted forms can be linked-to in cross-references.
  90
  91 =item libpods
  92
  93     --libpods=name:...:name
  94
  95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  96
  97 =item netscape
  98
  99     --netscape
 100
 101 Use Netscape HTML directives when applicable.
 102
 103 =item nonetscape
 104
 105     --nonetscape
 106
 107 Do not use Netscape HTML directives (default).
 108
 109 =item index
 110
 111     --index
 112
 113 Generate an index at the top of the HTML file (default behaviour).
 114
 115 =item noindex
 116
 117     --noindex
 118
 119 Do not generate an index at the top of the HTML file.
 120
 121
 122 =item recurse
 123
 124     --recurse
 125
 126 Recurse into subdirectories specified in podpath (default behaviour).
 127
 128 =item norecurse
 129
 130     --norecurse
 131
 132 Do not recurse into subdirectories specified in podpath.
 133
 134 =item title
 135
 136     --title=title
 137
 138 Specify the title of the resulting HTML file.
 139
 140 =item css
 141
 142     --css=stylesheet
 143
 144 Specify the URL of a cascading style sheet.
 145
 146 =item verbose
 147
 148     --verbose
 149
 150 Display progress messages.
 151
 152 =item quiet
 153
 154     --quiet
 155
 156 Don't display I<mostly harmless> warning messages.
 157
 158 =back
 159
 160 =head1 EXAMPLE
 161
 162     pod2html("pod2html",
 163              "--podpath=lib:ext:pod:vms",
 164              "--podroot=/usr/src/perl",
 165              "--htmlroot=/perl/nmanual",
 166              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 167              "--recurse",
 168              "--infile=foo.pod",
 169              "--outfile=/perl/nmanual/foo.html");
 170
 171 =head1 ENVIRONMENT
 172
 173 Uses $Config{pod2html} to setup default options.
 174
 175 =head1 AUTHOR
 176
 177 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 178
 179 =head1 SEE ALSO
 180
 181 L<perlpod>
 182
 183 =head1 COPYRIGHT
 184
 185 This program is distributed under the Artistic License.
 186
 187 =cut
 188
 189 my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
 190 my $dircache = "pod2htmd$cache_ext";
 191 my $itemcache = "pod2htmi$cache_ext";
 192
 193 my @begin_stack = ();           # begin/end stack
 194
 195 my @libpods = ();               # files to search for links from C<> directives
 196 my $htmlroot = "/";             # http-server base directory from which all
 197                                 #   relative paths in $podpath stem.
 198 my $htmldir = "";               # The directory to which the html pages
 199                                 # will (eventually) be written.
 200 my $htmlfile = "";              # write to stdout by default
 201 my $htmlfileurl = "" ;          # The url that other files would use to
 202                                 # refer to this file.  This is only used
 203                                 # to make relative urls that point to
 204                                 # other files.
 205 my $podfile = "";               # read from stdin by default
 206 my @podpath = ();               # list of directories containing library pods.
 207 my $podroot = ".";              # filesystem base directory from which all
 208                                 #   relative paths in $podpath stem.
 209 my $css = '';                   # Cascading style sheet
 210 my $recurse = 1;                # recurse on subdirectories in $podpath.
 211 my $quiet = 0;                  # not quiet by default
 212 my $verbose = 0;                # not verbose by default
 213 my $doindex = 1;                # non-zero if we should generate an index
 214 my $listlevel = 0;              # current list depth
 215 my @listend = ();               # the text to use to end the list.
 216 my $after_lpar = 0;             # set to true after a par in an =item
 217 my $ignore = 1;                 # whether or not to format text.  we don't
 218                                 #   format text until we hit our first pod
 219                                 #   directive.
 220
 221 my %items_named = ();           # for the multiples of the same item in perlfunc
 222 my @items_seen = ();
 223 my $netscape = 0;               # whether or not to use netscape directives.
 224 my $title;                      # title to give the pod(s)
 225 my $header = 0;                 # produce block header/footer
 226 my $top = 1;                    # true if we are at the top of the doc.  used
 227                                 #   to prevent the first <HR> directive.
 228 my $paragraph;                  # which paragraph we're processing (used
 229                                 #   for error messages)
 230 my $ptQuote = 0;                # status of double-quote conversion
 231 my %pages = ();                 # associative array used to find the location
 232                                 #   of pages referenced by L<> links.
 233 my %sections = ();              # sections within this page
 234 my %items = ();                 # associative array used to find the location
 235                                 #   of =item directives referenced by C<> links
 236 my %local_items = ();           # local items - avoid destruction of %items
 237 my $Is83;                       # is dos with short filenames (8.3)
 238
 239 sub init_globals {
 240 $dircache = "pod2htmd$cache_ext";
 241 $itemcache = "pod2htmi$cache_ext";
 242
 243 @begin_stack = ();              # begin/end stack
 244
 245 @libpods = ();          # files to search for links from C<> directives
 246 $htmlroot = "/";                # http-server base directory from which all
 247                                 #   relative paths in $podpath stem.
 248 $htmlfile = "";         # write to stdout by default
 249 $podfile = "";          # read from stdin by default
 250 @podpath = ();          # list of directories containing library pods.
 251 $podroot = ".";         # filesystem base directory from which all
 252                                 #   relative paths in $podpath stem.
 253 $css = '';                   # Cascading style sheet
 254 $recurse = 1;           # recurse on subdirectories in $podpath.
 255 $quiet = 0;             # not quiet by default
 256 $verbose = 0;           # not verbose by default
 257 $doindex = 1;                   # non-zero if we should generate an index
 258 $listlevel = 0;         # current list depth
 259 @listend = ();          # the text to use to end the list.
 260 $after_lpar = 0;        # set to true after a par in an =item
 261 $ignore = 1;                    # whether or not to format text.  we don't
 262                                 #   format text until we hit our first pod
 263                                 #   directive.
 264
 265 @items_seen = ();
 266 %items_named = ();
 267 $netscape = 0;          # whether or not to use netscape directives.
 268 $header = 0;                    # produce block header/footer
 269 $title = '';                    # title to give the pod(s)
 270 $top = 1;                       # true if we are at the top of the doc.  used
 271                                 #   to prevent the first <HR> directive.
 272 $paragraph = '';                        # which paragraph we're processing (used
 273                                 #   for error messages)
 274 %sections = ();         # sections within this page
 275
 276 # These are not reinitialised here but are kept as a cache.
 277 # See get_cache and related cache management code.
 278 #%pages = ();                   # associative array used to find the location
 279                                 #   of pages referenced by L<> links.
 280 #%items = ();                   # associative array used to find the location
 281                                 #   of =item directives referenced by C<> links
 282 %local_items = ();
 283 $Is83=$^O eq 'dos';
 284 }
 285
 286 #
 287 # clean_data: global clean-up of pod data
 288 #
 289 sub clean_data($){
 290     my( $dataref ) = @_;
 291     my $i;
 292     for( $i = 0; $i <= $#$dataref; $i++ ){
 293         ${$dataref}[$i] =~ s/\s+\Z//;
 294
 295         # have a look for all-space lines
 296         if( ${$dataref}[$i] =~ /^\s+$/m ){
 297             my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
 298             splice( @$dataref, $i, 1, @chunks );
 299         }
 300     }
 301 }
 302
 303
 304 sub pod2html {
 305     local(@ARGV) = @_;
 306     local($/);
 307     local $_;
 308
 309     init_globals();
 310
 311     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 312
 313     # cache of %pages and %items from last time we ran pod2html
 314
 315     #undef $opt_help if defined $opt_help;
 316
 317     # parse the command-line parameters
 318     parse_command_line();
 319
 320     # set some variables to their default values if necessary
 321     local *POD;
 322     unless (@ARGV && $ARGV[0]) {
 323         $podfile  = "-" unless $podfile;        # stdin
 324         open(POD, "<$podfile")
 325                 || die "$0: cannot open $podfile file for input: $!\n";
 326     } else {
 327         $podfile = $ARGV[0];  # XXX: might be more filenames
 328         *POD = *ARGV;
 329     }
 330     $htmlfile = "-" unless $htmlfile;   # stdout
 331     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 332     $htmldir =~ s#/$## ;                # so we don't get a //
 333     if (  $htmlroot eq ''
 334        && defined( $htmldir )
 335        && $htmldir ne ''
 336        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 337        )
 338     {
 339         # Set the 'base' url for this file, so that we can use it
 340         # as the location from which to calculate relative links
 341         # to other files. If this is '', then absolute links will
 342         # be used throughout.
 343         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 344     }
 345
 346     # read the pod a paragraph at a time
 347     warn "Scanning for sections in input file(s)\n" if $verbose;
 348     $/ = "";
 349     my @poddata  = <POD>;
 350     close(POD);
 351     clean_data( \@poddata );
 352
 353     # scan the pod for =head[1-6] directives and build an index
 354     my $index = scan_headings(\%sections, @poddata);
 355
 356     unless($index) {
 357         warn "No headings in $podfile\n" if $verbose;
 358     }
 359
 360     # open the output file
 361     open(HTML, ">$htmlfile")
 362             || die "$0: cannot open $htmlfile file for output: $!\n";
 363
 364     # put a title in the HTML file if one wasn't specified
 365     if ($title eq '') {
 366         TITLE_SEARCH: {
 367             for (my $i = 0; $i < @poddata; $i++) {
 368                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 369                     for my $para ( @poddata[$i, $i+1] ) {
 370                         last TITLE_SEARCH
 371                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 372                     }
 373                 }
 374
 375             }
 376         }
 377     }
 378     if (!$title and $podfile =~ /\.pod$/) {
 379         # probably a split pod so take first =head[12] as title
 380         for (my $i = 0; $i < @poddata; $i++) {
 381             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 382         }
 383         warn "adopted '$title' as title for $podfile\n"
 384             if $verbose and $title;
 385     }
 386     if ($title) {
 387         $title =~ s/\s*\(.*\)//;
 388     } else {
 389         warn "$0: no title for $podfile" unless $quiet;
 390         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 391         $title = ($podfile eq "-" ? 'No Title' : $1);
 392         warn "using $title" if $verbose;
 393     }
 394     my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
 395     $csslink =~ s,\\,/,g;
 396     $csslink =~ s,(/.):,$1|,;
 397
 398     my $block = $header ? <<END_OF_BLOCK : '';
 399 <TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
 400 <TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
 401 <FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
 402 </TD></TR>
 403 </TABLE>
 404 END_OF_BLOCK
 405
 406     print HTML <<END_OF_HEAD;
 407 <HTML>
 408 <HEAD>
 409 <TITLE>$title</TITLE>$csslink
 410 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 411 </HEAD>
 412
 413 <BODY>
 414 $block
 415 END_OF_HEAD
 416
 417     # load/reload/validate/cache %pages and %items
 418     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 419
 420     # scan the pod for =item directives
 421     scan_items( \%local_items, "", @poddata);
 422
 423     # put an index at the top of the file.  note, if $doindex is 0 we
 424     # still generate an index, but surround it with an html comment.
 425     # that way some other program can extract it if desired.
 426     $index =~ s/--+/-/g;
 427     print HTML "<A NAME=\"__index__\"></A>\n";
 428     print HTML "<!-- INDEX BEGIN -->\n";
 429     print HTML "<!--\n" unless $doindex;
 430     print HTML $index;
 431     print HTML "-->\n" unless $doindex;
 432     print HTML "<!-- INDEX END -->\n\n";
 433     print HTML "<HR>\n" if $doindex and $index;
 434
 435     # now convert this file
 436     my $after_item;             # set to true after an =item
 437     warn "Converting input file $podfile\n" if $verbose;
 438     foreach my $i (0..$#poddata){
 439         $ptQuote = 0; # status of quote conversion
 440
 441         $_ = $poddata[$i];
 442         $paragraph = $i+1;
 443         if (/^(=.*)/s) {        # is it a pod directive?
 444             $ignore = 0;
 445             $after_item = 0;
 446             $_ = $1;
 447             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 448                 process_begin($1, $2);
 449             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 450                 process_end($1, $2);
 451             } elsif (/^=cut/) {                 # =cut
 452                 process_cut();
 453             } elsif (/^=pod/) {                 # =pod
 454                 process_pod();
 455             } else {
 456                 next if @begin_stack && $begin_stack[-1] ne 'html';
 457
 458                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 459                     process_head( $1, $2, $doindex && $index );
 460                 } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
 461                     warn "$0: $podfile: =item without bullet, number or text"
 462                        . " in paragraph $paragraph.\n" if $1 eq '';
 463                     process_item( $1 );
 464                     $after_item = 1;
 465                 } elsif (/^=over\s*(.*)/) {             # =over N
 466                     process_over();
 467                 } elsif (/^=back/) {            # =back
 468                     process_back();
 469                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 470                     process_for($1,$2);
 471                 } else {
 472                     /^=(\S*)\s*/;
 473                     warn "$0: $podfile: unknown pod directive '$1' in "
 474                        . "paragraph $paragraph.  ignoring.\n";
 475                 }
 476             }
 477             $top = 0;
 478         }
 479         else {
 480             next if $ignore;
 481             next if @begin_stack && $begin_stack[-1] ne 'html';
 482             my $text = $_;
 483             if( $text =~ /\A\s+/ ){
 484                 process_pre( \$text );
 485                 print HTML "<PRE>\n$text</PRE>\n";
 486
 487             } else {
 488                 process_text( \$text );
 489
 490                 # experimental: check for a paragraph where all lines
 491                 # have some ...\t...\t...\n pattern
 492                 if( $text =~ /\t/ ){
 493                     my @lines = split( "\n", $text );
 494                     if( @lines > 1 ){
 495                         my $all = 2;
 496                         foreach my $line ( @lines ){
 497                             if( $line =~ /\S/ && $line !~ /\t/ ){
 498                                 $all--;
 499                                 last if $all == 0;
 500                             }
 501                         }
 502                         if( $all > 0 ){
 503                             $text =~ s/\t+/<TD>/g;
 504                             $text =~ s/^/<TR><TD>/gm;
 505                             $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
 506                                     $text . '</TABLE>';
 507                         }
 508                     }
 509                 }
 510                 ## end of experimental
 511
 512                 if( $after_item ){
 513                     print HTML "$text\n";
 514                     $after_lpar = 1;
 515                 } else {
 516                     print HTML "<P>$text</P>\n";
 517                 }
 518             }
 519             $after_item = 0;
 520         }
 521     }
 522
 523     # finish off any pending directives
 524     finish_list();
 525
 526     # link to page index
 527     print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
 528         if $doindex and $index;
 529
 530     print HTML <<END_OF_TAIL;
 531 $block
 532 </BODY>
 533
 534 </HTML>
 535 END_OF_TAIL
 536
 537     # close the html file
 538     close(HTML);
 539
 540     warn "Finished\n" if $verbose;
 541 }
 542
 543 ##############################################################################
 544
 545 my $usage;                      # see below
 546 sub usage {
 547     my $podfile = shift;
 548     warn "$0: $podfile: @_\n" if @_;
 549     die $usage;
 550 }
 551
 552 $usage =<<END_OF_USAGE;
 553 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 554            --podpath=<name>:...:<name> --podroot=<name>
 555            --libpods=<name>:...:<name> --recurse --verbose --index
 556            --netscape --norecurse --noindex
 557
 558   --flush      - flushes the item and directory caches.
 559   --help       - prints this message.
 560   --htmlroot   - http-server base directory from which all relative paths
 561                  in podpath stem (default is /).
 562   --index      - generate an index at the top of the resulting html
 563                  (default).
 564   --infile     - filename for the pod to convert (input taken from stdin
 565                  by default).
 566   --libpods    - colon-separated list of pages to search for =item pod
 567                  directives in as targets of C<> and implicit links (empty
 568                  by default).  note, these are not filenames, but rather
 569                  page names like those that appear in L<> links.
 570   --netscape   - will use netscape html directives when applicable.
 571   --nonetscape - will not use netscape directives (default).
 572   --outfile    - filename for the resulting html file (output sent to
 573                  stdout by default).
 574   --podpath    - colon-separated list of directories containing library
 575                  pods.  empty by default.
 576   --podroot    - filesystem base directory from which all relative paths
 577                  in podpath stem (default is .).
 578   --noindex    - don't generate an index at the top of the resulting html.
 579   --norecurse  - don't recurse on those subdirectories listed in podpath.
 580   --recurse    - recurse on those subdirectories listed in podpath
 581                  (default behavior).
 582   --title      - title that will appear in resulting html file.
 583   --header     - produce block header/footer
 584   --css        - stylesheet URL
 585   --verbose    - self-explanatory
 586   --quiet      - supress some benign warning messages
 587
 588 END_OF_USAGE
 589
 590 sub parse_command_line {
 591     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
 592     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 593     my $result = GetOptions(
 594                             'flush'      => \$opt_flush,
 595                             'help'       => \$opt_help,
 596                             'htmldir=s'  => \$opt_htmldir,
 597                             'htmlroot=s' => \$opt_htmlroot,
 598                             'index!'     => \$opt_index,
 599                             'infile=s'   => \$opt_infile,
 600                             'libpods=s'  => \$opt_libpods,
 601                             'netscape!'  => \$opt_netscape,
 602                             'outfile=s'  => \$opt_outfile,
 603                             'podpath=s'  => \$opt_podpath,
 604                             'podroot=s'  => \$opt_podroot,
 605                             'recurse!'   => \$opt_recurse,
 606                             'title=s'    => \$opt_title,
 607                             'header'     => \$opt_header,
 608                             'css=s'      => \$opt_css,
 609                             'verbose'    => \$opt_verbose,
 610                             'quiet'      => \$opt_quiet,
 611                            );
 612     usage("-", "invalid parameters") if not $result;
 613
 614     usage("-") if defined $opt_help;    # see if the user asked for help
 615     $opt_help = "";                     # just to make -w shut-up.
 616
 617     $podfile  = $opt_infile if defined $opt_infile;
 618     $htmlfile = $opt_outfile if defined $opt_outfile;
 619     $htmldir  = $opt_htmldir if defined $opt_outfile;
 620
 621     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 622     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 623
 624     warn "Flushing item and directory caches\n"
 625         if $opt_verbose && defined $opt_flush;
 626     unlink($dircache, $itemcache) if defined $opt_flush;
 627
 628     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 629     $podroot  = $opt_podroot if defined $opt_podroot;
 630
 631     $doindex  = $opt_index if defined $opt_index;
 632     $recurse  = $opt_recurse if defined $opt_recurse;
 633     $title    = $opt_title if defined $opt_title;
 634     $header   = defined $opt_header ? 1 : 0;
 635     $css      = $opt_css if defined $opt_css;
 636     $verbose  = defined $opt_verbose ? 1 : 0;
 637     $quiet    = defined $opt_quiet ? 1 : 0;
 638     $netscape = $opt_netscape if defined $opt_netscape;
 639 }
 640
 641
 642 my $saved_cache_key;
 643
 644 sub get_cache {
 645     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 646     my @cache_key_args = @_;
 647
 648     # A first-level cache:
 649     # Don't bother reading the cache files if they still apply
 650     # and haven't changed since we last read them.
 651
 652     my $this_cache_key = cache_key(@cache_key_args);
 653
 654     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 655
 656     # load the cache of %pages and %items if possible.  $tests will be
 657     # non-zero if successful.
 658     my $tests = 0;
 659     if (-f $dircache && -f $itemcache) {
 660         warn "scanning for item cache\n" if $verbose;
 661         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 662     }
 663
 664     # if we didn't succeed in loading the cache then we must (re)build
 665     #  %pages and %items.
 666     if (!$tests) {
 667         warn "scanning directories in pod-path\n" if $verbose;
 668         scan_podpath($podroot, $recurse, 0);
 669     }
 670     $saved_cache_key = cache_key(@cache_key_args);
 671 }
 672
 673 sub cache_key {
 674     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 675     return join('!', $dircache, $itemcache, $recurse,
 676         @$podpath, $podroot, stat($dircache), stat($itemcache));
 677 }
 678
 679 #
 680 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 681 #  are valid caches of %pages and %items.  if they are valid then it loads
 682 #  them and returns a non-zero value.
 683 #
 684 sub load_cache {
 685     my($dircache, $itemcache, $podpath, $podroot) = @_;
 686     my($tests);
 687     local $_;
 688
 689     $tests = 0;
 690
 691     open(CACHE, "<$itemcache") ||
 692         die "$0: error opening $itemcache for reading: $!\n";
 693     $/ = "\n";
 694
 695     # is it the same podpath?
 696     $_ = <CACHE>;
 697     chomp($_);
 698     $tests++ if (join(":", @$podpath) eq $_);
 699
 700     # is it the same podroot?
 701     $_ = <CACHE>;
 702     chomp($_);
 703     $tests++ if ($podroot eq $_);
 704
 705     # load the cache if its good
 706     if ($tests != 2) {
 707         close(CACHE);
 708         return 0;
 709     }
 710
 711     warn "loading item cache\n" if $verbose;
 712     while (<CACHE>) {
 713         /(.*?) (.*)$/;
 714         $items{$1} = $2;
 715     }
 716     close(CACHE);
 717
 718     warn "scanning for directory cache\n" if $verbose;
 719     open(CACHE, "<$dircache") ||
 720         die "$0: error opening $dircache for reading: $!\n";
 721     $/ = "\n";
 722     $tests = 0;
 723
 724     # is it the same podpath?
 725     $_ = <CACHE>;
 726     chomp($_);
 727     $tests++ if (join(":", @$podpath) eq $_);
 728
 729     # is it the same podroot?
 730     $_ = <CACHE>;
 731     chomp($_);
 732     $tests++ if ($podroot eq $_);
 733
 734     # load the cache if its good
 735     if ($tests != 2) {
 736         close(CACHE);
 737         return 0;
 738     }
 739
 740     warn "loading directory cache\n" if $verbose;
 741     while (<CACHE>) {
 742         /(.*?) (.*)$/;
 743         $pages{$1} = $2;
 744     }
 745
 746     close(CACHE);
 747
 748     return 1;
 749 }
 750
 751 #
 752 # scan_podpath - scans the directories specified in @podpath for directories,
 753 #  .pod files, and .pm files.  it also scans the pod files specified in
 754 #  @libpods for =item directives.
 755 #
 756 sub scan_podpath {
 757     my($podroot, $recurse, $append) = @_;
 758     my($pwd, $dir);
 759     my($libpod, $dirname, $pod, @files, @poddata);
 760
 761     unless($append) {
 762         %items = ();
 763         %pages = ();
 764     }
 765
 766     # scan each directory listed in @podpath
 767     $pwd = getcwd();
 768     chdir($podroot)
 769         || die "$0: error changing to directory $podroot: $!\n";
 770     foreach $dir (@podpath) {
 771         scan_dir($dir, $recurse);
 772     }
 773
 774     # scan the pods listed in @libpods for =item directives
 775     foreach $libpod (@libpods) {
 776         # if the page isn't defined then we won't know where to find it
 777         # on the system.
 778         next unless defined $pages{$libpod} && $pages{$libpod};
 779
 780         # if there is a directory then use the .pod and .pm files within it.
 781         # NOTE: Only finds the first so-named directory in the tree.
 782 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 783         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 784             #  find all the .pod and .pm files within the directory
 785             $dirname = $1;
 786             opendir(DIR, $dirname) ||
 787                 die "$0: error opening directory $dirname: $!\n";
 788             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 789             closedir(DIR);
 790
 791             # scan each .pod and .pm file for =item directives
 792             foreach $pod (@files) {
 793                 open(POD, "<$dirname/$pod") ||
 794                     die "$0: error opening $dirname/$pod for input: $!\n";
 795                 @poddata = <POD>;
 796                 close(POD);
 797                 clean_data( \@poddata );
 798
 799                 scan_items( \%items, "$dirname/$pod", @poddata);
 800             }
 801
 802             # use the names of files as =item directives too.
 803 ### Don't think this should be done this way - confuses issues.(WL)
 804 ###         foreach $pod (@files) {
 805 ###             $pod =~ /^(.*)(\.pod|\.pm)$/;
 806 ###             $items{$1} = "$dirname/$1.html" if $1;
 807 ###         }
 808         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 809                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 810             # scan the .pod or .pm file for =item directives
 811             $pod = $1;
 812             open(POD, "<$pod") ||
 813                 die "$0: error opening $pod for input: $!\n";
 814             @poddata = <POD>;
 815             close(POD);
 816             clean_data( \@poddata );
 817
 818             scan_items( \%items, "$pod", @poddata);
 819         } else {
 820             warn "$0: shouldn't be here (line ".__LINE__."\n";
 821         }
 822     }
 823     @poddata = ();      # clean-up a bit
 824
 825     chdir($pwd)
 826         || die "$0: error changing to directory $pwd: $!\n";
 827
 828     # cache the item list for later use
 829     warn "caching items for later use\n" if $verbose;
 830     open(CACHE, ">$itemcache") ||
 831         die "$0: error open $itemcache for writing: $!\n";
 832
 833     print CACHE join(":", @podpath) . "\n$podroot\n";
 834     foreach my $key (keys %items) {
 835         print CACHE "$key $items{$key}\n";
 836     }
 837
 838     close(CACHE);
 839
 840     # cache the directory list for later use
 841     warn "caching directories for later use\n" if $verbose;
 842     open(CACHE, ">$dircache") ||
 843         die "$0: error open $dircache for writing: $!\n";
 844
 845     print CACHE join(":", @podpath) . "\n$podroot\n";
 846     foreach my $key (keys %pages) {
 847         print CACHE "$key $pages{$key}\n";
 848     }
 849
 850     close(CACHE);
 851 }
 852
 853 #
 854 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 855 #  files, and .pm files.  notes those that it finds.  this information will
 856 #  be used later in order to figure out where the pages specified in L<>
 857 #  links are on the filesystem.
 858 #
 859 sub scan_dir {
 860     my($dir, $recurse) = @_;
 861     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 862     local $_;
 863
 864     @subdirs = ();
 865     @pods = ();
 866
 867     opendir(DIR, $dir) ||
 868         die "$0: error opening directory $dir: $!\n";
 869     while (defined($_ = readdir(DIR))) {
 870         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 871             $pages{$_}  = "" unless defined $pages{$_};
 872             $pages{$_} .= "$dir/$_:";
 873             push(@subdirs, $_);
 874         } elsif (/\.pod$/) {                                # .pod
 875             s/\.pod$//;
 876             $pages{$_}  = "" unless defined $pages{$_};
 877             $pages{$_} .= "$dir/$_.pod:";
 878             push(@pods, "$dir/$_.pod");
 879         } elsif (/\.pm$/) {                                 # .pm
 880             s/\.pm$//;
 881             $pages{$_}  = "" unless defined $pages{$_};
 882             $pages{$_} .= "$dir/$_.pm:";
 883             push(@pods, "$dir/$_.pm");
 884         }
 885     }
 886     closedir(DIR);
 887
 888     # recurse on the subdirectories if necessary
 889     if ($recurse) {
 890         foreach my $subdir (@subdirs) {
 891             scan_dir("$dir/$subdir", $recurse);
 892         }
 893     }
 894 }
 895
 896 #
 897 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 898 #  build an index.
 899 #
 900 sub scan_headings {
 901     my($sections, @data) = @_;
 902     my($tag, $which_head, $otitle, $listdepth, $index);
 903
 904     # here we need      local $ignore = 0;
 905     #  unfortunately, we can't have it, because $ignore is lexical
 906     $ignore = 0;
 907
 908     $listdepth = 0;
 909     $index = "";
 910
 911     # scan for =head directives, note their name, and build an index
 912     #  pointing to each of them.
 913     foreach my $line (@data) {
 914         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 915             ($tag, $which_head, $otitle) = ($1,$2,$3);
 916
 917             my $title = depod( $otitle );
 918             my $name = htmlify( $title );
 919             $$sections{$name} = 1;
 920             $title = process_text( \$otitle );
 921
 922             while ($which_head != $listdepth) {
 923                 if ($which_head > $listdepth) {
 924                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 925                     $listdepth++;
 926                 } elsif ($which_head < $listdepth) {
 927                     $listdepth--;
 928                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 929                 }
 930             }
 931
 932             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 933                       "<A HREF=\"#" . $name . "\">" .
 934                       $title . "</A></LI>";
 935         }
 936     }
 937
 938     # finish off the lists
 939     while ($listdepth--) {
 940         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 941     }
 942
 943     # get rid of bogus lists
 944     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 945
 946     $ignore = 1;        # restore old value;
 947
 948     return $index;
 949 }
 950
 951 #
 952 # scan_items - scans the pod specified by $pod for =item directives.  we
 953 #  will use this information later on in resolving C<> links.
 954 #
 955 sub scan_items {
 956     my( $itemref, $pod, @poddata ) = @_;
 957     my($i, $item);
 958     local $_;
 959
 960     $pod =~ s/\.pod$//;
 961     $pod .= ".html" if $pod;
 962
 963     foreach $i (0..$#poddata) {
 964         my $txt = depod( $poddata[$i] );
 965
 966         # figure out what kind of item it is.
 967         # Build string for referencing this item.
 968         if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
 969             next unless $1;
 970             $item = $1;
 971         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
 972             $item = $1;
 973         } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
 974             $item = $1;
 975         } else {
 976             next;
 977         }
 978         my $fid = fragment_id( $item );
 979         $$itemref{$fid} = "$pod" if $fid;
 980     }
 981 }
 982
 983 #
 984 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 985 #
 986 sub process_head {
 987     my($tag, $heading, $hasindex) = @_;
 988
 989     # figure out the level of the =head
 990     $tag =~ /head([1-6])/;
 991     my $level = $1;
 992
 993     if( $listlevel ){
 994         warn "$0: $podfile: unterminated list at =head in paragraph $paragraph.  ignoring.\n";
 995         while( $listlevel ){
 996             process_back();
 997         }
 998     }
 999
1000     print HTML "<P>\n";
1001     if( $level == 1 && ! $top ){
1002         print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1003             if $hasindex;
1004         print HTML "<HR>\n"
1005     }
1006
1007     my $name = htmlify( depod( $heading ) );
1008     my $convert = process_text( \$heading );
1009     print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
1010 }
1011
1012
1013 #
1014 # emit_item_tag - print an =item's text
1015 # Note: The global $EmittedItem is used for inhibiting self-references.
1016 #
1017 my $EmittedItem;
1018
1019 sub emit_item_tag($$$){
1020     my( $otext, $text, $compact ) = @_;
1021     my $item = fragment_id( $text );
1022
1023     $EmittedItem = $item;
1024     ### print STDERR "emit_item_tag=$item ($text)\n";
1025
1026     print HTML '<STRONG>';
1027     if ($items_named{$item}++) {
1028         print HTML process_text( \$otext );
1029     } else {
1030         my $name = 'item_' . $item;
1031         print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1032     }
1033     print HTML "</STRONG><BR>\n";
1034     undef( $EmittedItem );
1035 }
1036
1037 sub emit_li {
1038     my( $tag ) = @_;
1039     if( $items_seen[$listlevel]++ == 0 ){
1040         push( @listend, "</$tag>" );
1041         print HTML "<$tag>\n";
1042     }
1043     print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1044 }
1045
1046 #
1047 # process_item - convert a pod item tag and convert it to HTML format.
1048 #
1049 sub process_item {
1050     my( $otext ) = @_;
1051
1052     # lots of documents start a list without doing an =over.  this is
1053     # bad!  but, the proper thing to do seems to be to just assume
1054     # they did do an =over.  so warn them once and then continue.
1055     if( $listlevel == 0 ){
1056         warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n";
1057         process_over();
1058     }
1059
1060     # formatting: insert a paragraph if preceding item has >1 paragraph
1061     if( $after_lpar ){
1062         print HTML "<P></P>\n";
1063         $after_lpar = 0;
1064     }
1065
1066     # remove formatting instructions from the text
1067     my $text = depod( $otext );
1068
1069     # all the list variants:
1070     if( $text =~ /\A\*/ ){ # bullet
1071         emit_li( 'UL' );
1072         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1073             my $tag = $1;
1074             $otext =~ s/\A\*\s+//;
1075             emit_item_tag( $otext, $tag, 1 );
1076         }
1077
1078     } elsif( $text =~ /\A\d+/ ){ # numbered list
1079         emit_li( 'OL' );
1080         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1081             my $tag = $1;
1082             $otext =~ s/\A\d+\.?\s*//;
1083             emit_item_tag( $otext, $tag, 1 );
1084         }
1085
1086     } else {                    # definition list
1087         emit_li( 'DL' );
1088         if ($text =~ /\A(.+)\Z/s ){ # should have text
1089             emit_item_tag( $otext, $text, 1 );
1090         }
1091        print HTML '<DD>';
1092     }
1093     print HTML "\n";
1094 }
1095
1096 #
1097 # process_over - process a pod over tag and start a corresponding HTML list.
1098 #
1099 sub process_over {
1100     # start a new list
1101     $listlevel++;
1102     push( @items_seen, 0 );
1103     $after_lpar = 0;
1104 }
1105
1106 #
1107 # process_back - process a pod back tag and convert it to HTML format.
1108 #
1109 sub process_back {
1110     if( $listlevel == 0 ){
1111         warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n";
1112         return;
1113     }
1114
1115     # close off the list.  note, I check to see if $listend[$listlevel] is
1116     # defined because an =item directive may have never appeared and thus
1117     # $listend[$listlevel] may have never been initialized.
1118     $listlevel--;
1119     if( defined $listend[$listlevel] ){
1120         print HTML '<P></P>' if $after_lpar;
1121         print HTML $listend[$listlevel];
1122         print HTML "\n";
1123         pop( @listend );
1124     }
1125     $after_lpar = 0;
1126
1127     # clean up item count
1128     pop( @items_seen );
1129 }
1130
1131 #
1132 # process_cut - process a pod cut tag, thus start ignoring pod directives.
1133 #
1134 sub process_cut {
1135     $ignore = 1;
1136 }
1137
1138 #
1139 # process_pod - process a pod pod tag, thus stop ignoring pod directives
1140 # until we see a corresponding cut.
1141 #
1142 sub process_pod {
1143     # no need to set $ignore to 0 cause the main loop did it
1144 }
1145
1146 #
1147 # process_for - process a =for pod tag.  if it's for html, spit
1148 # it out verbatim, if illustration, center it, otherwise ignore it.
1149 #
1150 sub process_for {
1151     my($whom, $text) = @_;
1152     if ( $whom =~ /^(pod2)?html$/i) {
1153         print HTML $text;
1154     } elsif ($whom =~ /^illustration$/i) {
1155         1 while chomp $text;
1156         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1157           $text .= $ext, last if -r "$text$ext";
1158         }
1159         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1160     }
1161 }
1162
1163 #
1164 # process_begin - process a =begin pod tag.  this pushes
1165 # whom we're beginning on the begin stack.  if there's a
1166 # begin stack, we only print if it us.
1167 #
1168 sub process_begin {
1169     my($whom, $text) = @_;
1170     $whom = lc($whom);
1171     push (@begin_stack, $whom);
1172     if ( $whom =~ /^(pod2)?html$/) {
1173         print HTML $text if $text;
1174     }
1175 }
1176
1177 #
1178 # process_end - process a =end pod tag.  pop the
1179 # begin stack.  die if we're mismatched.
1180 #
1181 sub process_end {
1182     my($whom, $text) = @_;
1183     $whom = lc($whom);
1184     if ($begin_stack[-1] ne $whom ) {
1185         die "Unmatched begin/end at chunk $paragraph\n"
1186     }
1187     pop( @begin_stack );
1188 }
1189
1190 #
1191 # process_pre - indented paragraph, made into <PRE></PRE>
1192 #
1193 sub process_pre {
1194     my( $text ) = @_;
1195     my( $rest );
1196     return if $ignore;
1197
1198     $rest = $$text;
1199
1200     # insert spaces in place of tabs
1201     $rest =~ s#.*#
1202             my $line = $&;
1203             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1204             $line;
1205         #eg;
1206
1207     # convert some special chars to HTML escapes
1208     $rest =~ s/&/&amp;/g;
1209     $rest =~ s/</&lt;/g;
1210     $rest =~ s/>/&gt;/g;
1211     $rest =~ s/"/&quot;/g;
1212
1213     # try and create links for all occurrences of perl.* within
1214     # the preformatted text.
1215     $rest =~ s{
1216                  (\s*)(perl\w+)
1217               }{
1218                  if ( defined $pages{$2} ){     # is a link
1219                      qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1220                  } elsif (defined $pages{dosify($2)}) { # is a link
1221                      qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1222                  } else {
1223                      "$1$2";
1224                  }
1225               }xeg;
1226      $rest =~ s{
1227                  (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1228                }{
1229                   my $url ;
1230                   if ( $htmlfileurl ne '' ){
1231                      # Here, we take advantage of the knowledge
1232                      # that $htmlfileurl ne '' implies $htmlroot eq ''.
1233                      # Since $htmlroot eq '', we need to prepend $htmldir
1234                      # on the fron of the link to get the absolute path
1235                      # of the link's target. We check for a leading '/'
1236                      # to avoid corrupting links that are #, file:, etc.
1237                      my $old_url = $3 ;
1238                      $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1239                      $url = relativize_url( "$old_url.html", $htmlfileurl );
1240                   } else {
1241                      $url = "$3.html" ;
1242                   }
1243                   "$1$url" ;
1244                }xeg;
1245
1246     # Look for embedded URLs and make them into links.  We don't
1247     # relativize them since they are best left as the author intended.
1248
1249     my $urls = '(' . join ('|', qw{
1250                 http
1251                 telnet
1252                 mailto
1253                 news
1254                 gopher
1255                 file
1256                 wais
1257                 ftp
1258             } )
1259         . ')';
1260
1261     my $ltrs = '\w';
1262     my $gunk = '/#~:.?+=&%@!\-';
1263     my $punc = '.:?\-';
1264     my $any  = "${ltrs}${gunk}${punc}";
1265
1266     $rest =~ s{
1267         \b                          # start at word boundary
1268         (                           # begin $1  {
1269           $urls     :               # need resource and a colon
1270           (?!:)                     # Ignore File::, among others.
1271           [$any] +?                 # followed by on or more
1272                                     #  of any valid character, but
1273                                     #  be conservative and take only
1274                                     #  what you need to....
1275         )                           # end   $1  }
1276         (?=                         # look-ahead non-consumptive assertion
1277                 [$punc]*            # either 0 or more puntuation
1278                 [^$any]             #   followed by a non-url char
1279             |                       # or else
1280                 $                   #   then end of the string
1281         )
1282       }{<A HREF="$1">$1</A>}igox;
1283
1284     # text should be as it is (verbatim)
1285     $$text = $rest;
1286 }
1287
1288
1289 #
1290 # pure text processing
1291 #
1292 # pure_text/inIS_text: differ with respect to automatic C<> recognition.
1293 # we don't want this to happen within IS
1294 #
1295 sub pure_text($){
1296     my $text = shift();
1297     process_puretext( $text, \$ptQuote, 1 );
1298 }
1299
1300 sub inIS_text($){
1301     my $text = shift();
1302     process_puretext( $text, \$ptQuote, 0 );
1303 }
1304
1305 #
1306 # process_puretext - process pure text (without pod-escapes) converting
1307 #  double-quotes and handling implicit C<> links.
1308 #
1309 sub process_puretext {
1310     my($text, $quote, $notinIS) = @_;
1311
1312     ## Guessing at func() or [$@%&]*var references in plain text is destined
1313     ## to produce some strange looking ref's. uncomment to disable:
1314     ## $notinIS = 0;
1315
1316     my(@words, $lead, $trail);
1317
1318     # convert double-quotes to single-quotes
1319     if( $$quote && $text =~ s/"/''/s ){
1320         $$quote = 0;
1321     }
1322     while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1323     $$quote = 1 if $text =~ s/"/``/s;
1324
1325     # keep track of leading and trailing white-space
1326     $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1327     $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1328
1329     # split at space/non-space boundaries
1330     @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1331
1332     # process each word individually
1333     foreach my $word (@words) {
1334         # skip space runs
1335         next if $word =~ /^\s*$/;
1336         # see if we can infer a link
1337         if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
1338             # has parenthesis so should have been a C<> ref
1339             ## try for a pagename (perlXXX(1))?
1340             if( $2 =~ /^\d+$/ ){
1341                 my $url = page_sect( $word, '' );
1342                 if( defined $url ){
1343                     $word = "<A HREF=\"$url\">the $word manpage</A>";
1344                     next;
1345                 }
1346             }
1347             $word = emit_C( $word );
1348
1349 #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1350 ##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1351 ##          # perl variables, should be a C<> ref
1352 ##          $word = emit_C( $word );
1353
1354         } elsif ($word =~ m,^\w+://\w,) {
1355             # looks like a URL
1356             # Don't relativize it: leave it as the author intended
1357             $word = qq(<A HREF="$word">$word</A>);
1358         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1359             # looks like an e-mail address
1360             my ($w1, $w2, $w3) = ("", $word, "");
1361             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1362             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1363             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1364         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1365             $word = html_escape($word) if $word =~ /["&<>]/;
1366             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1367         } else {
1368             $word = html_escape($word) if $word =~ /["&<>]/;
1369         }
1370     }
1371
1372     # put everything back together
1373     return $lead . join( '', @words ) . $trail;
1374 }
1375
1376
1377 #
1378 # process_text - handles plaintext that appears in the input pod file.
1379 # there may be pod commands embedded within the text so those must be
1380 # converted to html commands.
1381 #
1382 sub process_text {
1383     return if $ignore;
1384     my( $tref ) = @_;
1385     my $res = process_text1( 0, $tref );
1386     $$tref = $res;
1387 }
1388
1389 sub process_text1($$;$){
1390     my( $lev, $rstr, $func ) = @_;
1391     $lev++ unless defined $func;
1392     my $res = '';
1393
1394     if( $func eq 'B' ){
1395         # B<text> - boldface
1396         $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1397
1398     } elsif( $func eq 'C' ){
1399         # C<code> - can be a ref or <CODE></CODE>
1400         # need to extract text
1401         my $par = go_ahead( $rstr, 'C' );
1402
1403         ## clean-up of the link target
1404         my $text = depod( $par );
1405
1406         ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1407         ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1408
1409         $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1410
1411     } elsif( $func eq 'E' ){
1412         # E<x> - convert to character
1413         $$rstr =~ s/^(\w+)>//;
1414         $res = "&$1;";
1415
1416     } elsif( $func eq 'F' ){
1417         # F<filename> - italizice
1418         $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1419
1420     } elsif( $func eq 'I' ){
1421         # I<text> - italizice
1422         $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1423
1424     } elsif( $func eq 'L' ){
1425         # L<link> - link
1426         ## L<text|cross-ref> => produce text, use cross-ref for linking
1427         ## L<cross-ref> => make text from cross-ref
1428         ## need to extract text
1429         my $par = go_ahead( $rstr, 'L' );
1430
1431         # some L<>'s that shouldn't be:
1432         # a) full-blown URL's are emitted as-is
1433         if( $par =~ m{^\w+://}s ){
1434             return make_URL_href( $par );
1435         }
1436         # b) C<...> is stripped and treated as C<>
1437         if( $par =~ /^C<(.*)>$/ ){
1438             my $text = depod( $1 );
1439             return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1440         }
1441
1442         # analyze the contents
1443         $par =~ s/\n/ /g;   # undo word-wrapped tags
1444         my $opar = $par;
1445         my $linktext;
1446         if( $par =~ s{^([^|]+)\|}{} ){
1447             $linktext = $1;
1448         }
1449
1450         # make sure sections start with a /
1451         $par =~ s{^"}{/"};
1452
1453         my( $page, $section, $ident );
1454
1455         # check for link patterns
1456         if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1457             # we've got a name/ident (no quotes)
1458             ( $page, $ident ) = ( $1, $2 );
1459             ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1460
1461         } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1462             # even though this should be a "section", we go for ident first
1463             ( $page, $ident ) = ( $1, $2 );
1464             ### print STDERR "--> L<$par> to page $page, section $section\n";
1465
1466         } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1467             ( $page, $section ) = ( '', $par );
1468             ### print STDERR "--> L<$par> to void page, section $section\n";
1469
1470         } else {
1471             ( $page, $section ) = ( $par, '' );
1472             ### print STDERR "--> L<$par> to page $par, void section\n";
1473         }
1474
1475         # now, either $section or $ident is defined. the convoluted logic
1476         # below tries to resolve L<> according to what the user specified.
1477         # failing this, we try to find the next best thing...
1478         my( $url, $ltext, $fid );
1479
1480         RESOLVE: {
1481             if( defined $ident ){
1482                 ## try to resolve $ident as an item
1483                 ( $url, $fid ) = coderef( $page, $ident );
1484                 if( $url ){
1485                     if( ! defined( $linktext ) ){
1486                         $linktext = $ident;
1487                         $linktext .= " in " if $ident && $page;
1488                         $linktext .= "the $page manpage" if $page;
1489                     }
1490                     ###  print STDERR "got coderef url=$url\n";
1491                     last RESOLVE;
1492                 }
1493                 ## no luck: go for a section (auto-quoting!)
1494                 $section = $ident;
1495             }
1496             ## now go for a section
1497             my $htmlsection = htmlify( $section );
1498             $url = page_sect( $page, $htmlsection );
1499             if( $url ){
1500                 if( ! defined( $linktext ) ){
1501                     $linktext = $section;
1502                     $linktext .= " in " if $section && $page;
1503                     $linktext .= "the $page manpage" if $page;
1504                 }
1505                 ### print STDERR "got page/section url=$url\n";
1506                 last RESOLVE;
1507             }
1508             ## no luck: go for an ident
1509             if( $section ){
1510                 $ident = $section;
1511             } else {
1512                 $ident = $page;
1513                 $page  = undef();
1514             }
1515             ( $url, $fid ) = coderef( $page, $ident );
1516             if( $url ){
1517                 if( ! defined( $linktext ) ){
1518                     $linktext = $ident;
1519                     $linktext .= " in " if $ident && $page;
1520                     $linktext .= "the $page manpage" if $page;
1521                 }
1522                 ### print STDERR "got section=>coderef url=$url\n";
1523                 last RESOLVE;
1524             }
1525
1526             # warning; show some text.
1527             $linktext = $opar unless defined $linktext;
1528             warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1529         }
1530
1531         # now we have an URL or just plain code
1532         $$rstr = $linktext . '>' . $$rstr;
1533         if( defined( $url ) ){
1534             $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1535         } else {
1536             $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1537         }
1538
1539     } elsif( $func eq 'S' ){
1540         # S<text> - non-breaking spaces
1541         $res = process_text1( $lev, $rstr );
1542         $res =~ s/ /&nbsp;/g;
1543
1544     } elsif( $func eq 'X' ){
1545         # X<> - ignore
1546         $$rstr =~ s/^[^>]*>//;
1547
1548     } elsif( $func eq 'Z' ){
1549         # Z<> - empty
1550         warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1551             unless $$rstr =~ s/^>//;
1552
1553     } else {
1554         while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1555             # all others: either recurse into new function or
1556             # terminate at closing angle bracket
1557             my $pt = $1;
1558             $pt .= '>' if $2 eq '>' &&  $lev == 1;
1559             $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1560             return $res if $2 eq '>' && $lev > 1;
1561             if( $2 ne '>' ){
1562                 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1563             }
1564
1565         }
1566         if( $lev == 1 ){
1567             $res .= pure_text( $$rstr );
1568         } else {
1569             warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1570         }
1571     }
1572     return $res;
1573 }
1574
1575 #
1576 # go_ahead: extract text of an IS (can be nested)
1577 #
1578 sub go_ahead($$){
1579     my( $rstr, $func ) = @_;
1580     my $res = '';
1581     my $level = 1;
1582     while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1583         $res .= $1;
1584         if( $2 eq '>' ){
1585             return $res if --$level == 0;
1586         } else {
1587             ++$level;
1588         }
1589         $res .= $2;
1590     }
1591     warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1592     return $res;
1593 }
1594
1595 #
1596 # emit_C - output result of C<text>
1597 #    $text is the depod-ed text
1598 #
1599 sub emit_C($;$){
1600     my( $text, $nocode ) = @_;
1601     my $res;
1602     my( $url, $fid ) = coderef( undef(), $text );
1603
1604     # need HTML-safe text
1605     my $linktext = html_escape( $text );
1606
1607     if( defined( $url ) &&
1608         (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1609         $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1610     } elsif( 0 && $nocode ){
1611         $res = $linktext;
1612     } else {
1613         $res = "<CODE>$linktext</CODE>";
1614     }
1615     return $res;
1616 }
1617
1618 #
1619 # html_escape: make text safe for HTML
1620 #
1621 sub html_escape {
1622     my $rest = $_[0];
1623     $rest   =~ s/&/&amp;/g;
1624     $rest   =~ s/</&lt;/g;
1625     $rest   =~ s/>/&gt;/g;
1626     $rest   =~ s/"/&quot;/g;
1627     return $rest;
1628 }
1629
1630
1631 #
1632 # dosify - convert filenames to 8.3
1633 #
1634 sub dosify {
1635     my($str) = @_;
1636     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1637     if ($Is83) {
1638         $str = lc $str;
1639         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1640         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1641     }
1642     return $str;
1643 }
1644
1645 #
1646 # page_sect - make an URL from the text of a L<>
1647 #
1648 sub page_sect($$) {
1649     my( $page, $section ) = @_;
1650     my( $linktext, $page83, $link);     # work strings
1651
1652     # check if we know that this is a section in this page
1653     if (!defined $pages{$page} && defined $sections{$page}) {
1654         $section = $page;
1655         $page = "";
1656         ### print STDERR "reset page='', section=$section\n";
1657     }
1658
1659     $page83=dosify($page);
1660     $page=$page83 if (defined $pages{$page83});
1661     if ($page eq "") {
1662         $link = "#" . htmlify( $section );
1663     } elsif ( $page =~ /::/ ) {
1664         $page =~ s,::,/,g;
1665         # Search page cache for an entry keyed under the html page name,
1666         # then look to see what directory that page might be in.  NOTE:
1667         # this will only find one page. A better solution might be to produce
1668         # an intermediate page that is an index to all such pages.
1669         my $page_name = $page ;
1670         $page_name =~ s,^.*/,, ;
1671         if ( defined( $pages{ $page_name } ) &&
1672              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1673            ) {
1674             $page = $1 ;
1675         }
1676         else {
1677             # NOTE: This branch assumes that all A::B pages are located in
1678             # $htmlroot/A/B.html . This is often incorrect, since they are
1679             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1680             # analyze the contents of %pages and figure out where any
1681             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1682             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1683             # lib/A/B.pm. This is also limited, but it's an improvement.
1684             # Maybe a hints file so that the links point to the correct places
1685             # nonetheless?
1686
1687         }
1688         $link = "$htmlroot/$page.html";
1689         $link .= "#" . htmlify( $section ) if ($section);
1690     } elsif (!defined $pages{$page}) {
1691         $link = "";
1692     } else {
1693         $section = htmlify( $section ) if $section ne "";
1694         ### print STDERR "...section=$section\n";
1695
1696         # if there is a directory by the name of the page, then assume that an
1697         # appropriate section will exist in the subdirectory
1698 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1699         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1700             $link = "$htmlroot/$1/$section.html";
1701             ### print STDERR "...link=$link\n";
1702
1703         # since there is no directory by the name of the page, the section will
1704         # have to exist within a .html of the same name.  thus, make sure there
1705         # is a .pod or .pm that might become that .html
1706         } else {
1707             $section = "#$section" if $section;
1708             ### print STDERR "...section=$section\n";
1709
1710             # check if there is a .pod with the page name
1711             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1712                 $link = "$htmlroot/$1.html$section";
1713             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1714                 $link = "$htmlroot/$1.html$section";
1715             } else {
1716                 $link = "";
1717             }
1718         }
1719     }
1720
1721     if ($link) {
1722         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1723         # implies $htmlroot eq ''. This means that the link in question
1724         # needs a prefix of $htmldir if it begins with '/'. The test for
1725         # the initial '/' is done to avoid '#'-only links, and to allow
1726         # for other kinds of links, like file:, ftp:, etc.
1727         my $url ;
1728         if (  $htmlfileurl ne '' ) {
1729             $link = "$htmldir$link" if $link =~ m{^/};
1730             $url = relativize_url( $link, $htmlfileurl );
1731 # print( "  b: [$link,$htmlfileurl,$url]\n" );
1732         }
1733         else {
1734             $url = $link ;
1735         }
1736         return $url;
1737
1738     } else {
1739         return undef();
1740     }
1741 }
1742
1743 #
1744 # relativize_url - convert an absolute URL to one relative to a base URL.
1745 # Assumes both end in a filename.
1746 #
1747 sub relativize_url {
1748     my ($dest,$source) = @_ ;
1749
1750     my ($dest_volume,$dest_directory,$dest_file) =
1751         File::Spec::Unix->splitpath( $dest ) ;
1752     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1753
1754     my ($source_volume,$source_directory,$source_file) =
1755         File::Spec::Unix->splitpath( $source ) ;
1756     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1757
1758     my $rel_path = '' ;
1759     if ( $dest ne '' ) {
1760        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1761     }
1762
1763     if ( $rel_path ne ''                &&
1764          substr( $rel_path, -1 ) ne '/' &&
1765          substr( $dest_file, 0, 1 ) ne '#'
1766         ) {
1767         $rel_path .= "/$dest_file" ;
1768     }
1769     else {
1770         $rel_path .= "$dest_file" ;
1771     }
1772
1773     return $rel_path ;
1774 }
1775
1776
1777 #
1778 # coderef - make URL from the text of a C<>
1779 #
1780 sub coderef($$){
1781     my( $page, $item ) = @_;
1782     my( $url );
1783
1784     my $fid = fragment_id( $item );
1785
1786     if( defined( $page ) ){
1787         # we have been given a $page...
1788         $page =~ s{::}{/}g;
1789
1790         # Do we take it? Item could be a section!
1791         my $base = $items{$fid};
1792         $base =~ s{[^/]*/}{};
1793         if( $base ne "$page.html" ){
1794             ###   print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1795             $page = undef();
1796         }
1797
1798     } else {
1799         # no page - local items precede cached items
1800         if(  exists $local_items{$fid} ){
1801             $page = $local_items{$fid};
1802         } else {
1803             $page = $items{$fid};
1804         }
1805     }
1806
1807     # if there was a pod file that we found earlier with an appropriate
1808     # =item directive, then create a link to that page.
1809     if( defined $page ){
1810         if( $page ){
1811             if( $pages{$page} =~ /([^:.]*)\.[^:]*:/){
1812                 $page = $1 . '.html';
1813             }
1814             my $link = "$htmlroot/$page#item_$fid";
1815
1816             # Here, we take advantage of the knowledge that $htmlfileurl
1817             # ne '' implies $htmlroot eq ''.
1818             if (  $htmlfileurl ne '' ) {
1819                 $link = "$htmldir$link" ;
1820                 $url = relativize_url( $link, $htmlfileurl ) ;
1821             } else {
1822                 $url = $link ;
1823             }
1824         } else {
1825             $url = "#item_" . $fid;
1826         }
1827
1828         confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1829     }
1830     return( $url, $fid );
1831 }
1832
1833
1834
1835 #
1836 # Adapted from Nick Ing-Simmons' PodToHtml package.
1837 sub relative_url {
1838     my $source_file = shift ;
1839     my $destination_file = shift;
1840
1841     my $source = URI::file->new_abs($source_file);
1842     my $uo = URI::file->new($destination_file,$source)->abs;
1843     return $uo->rel->as_string;
1844 }
1845
1846
1847 #
1848 # finish_list - finish off any pending HTML lists.  this should be called
1849 # after the entire pod file has been read and converted.
1850 #
1851 sub finish_list {
1852     while ($listlevel > 0) {
1853         print HTML "</DL>\n";
1854         $listlevel--;
1855     }
1856 }
1857
1858 #
1859 # htmlify - converts a pod section specification to a suitable section
1860 # specification for HTML. Note that we keep spaces and special characters
1861 # except ", ? (Netscape problem) and the hyphen (writer's problem...).
1862 #
1863 sub htmlify {
1864     my( $heading) = @_;
1865     $heading =~ s/(\s+)/ /g;
1866     $heading =~ s/\s+\Z//;
1867     $heading =~ s/\A\s+//;
1868     # The hyphen is a disgrace to the English language.
1869     $heading =~ s/[-"?]//g;
1870     $heading = lc( $heading );
1871     return $heading;
1872 }
1873
1874 #
1875 # depod - convert text by eliminating all interior sequences
1876 # Note: can be called with copy or modify semantics
1877 #
1878 my %E2c;
1879 $E2c{lt} = '<';
1880 $E2c{gt} = '>';
1881 $E2c{sol} = '/';
1882 $E2c{verbar} = '|';
1883
1884 sub depod($){
1885     my $string;
1886     if( ref( $_[0] ) ){
1887         $string =  ${$_[0]};
1888         ${$_[0]} = depod1( \$string );
1889     } else {
1890         $string =  $_[0];
1891         depod1( \$string );
1892     }
1893 }
1894
1895 sub depod1($;$){
1896   my( $rstr, $func ) = @_;
1897   my $res = '';
1898   if( ! defined( $func ) ){
1899       # skip to next begin of an interior sequence
1900       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1901          # recurse into its text
1902          $res .= $1 . depod1( $rstr, $2 );
1903       }
1904       $res .= $$rstr;
1905   } elsif( $func eq 'E' ){
1906       # E<x> - convert to character
1907       $$rstr =~ s/^(\w+)>//;
1908       $res .= $E2c{$1};
1909   } elsif( $func eq 'X' ){
1910       # X<> - ignore
1911       $$rstr =~ s/^[^>]*>//;
1912   } elsif( $func eq 'Z' ){
1913       # Z<> - empty
1914       $$rstr =~ s/^>//;
1915   } else {
1916       # all others: either recurse into new function or
1917       # terminate at closing angle bracket
1918       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1919           $res .= $1;
1920           last if $2 eq '>';
1921           $res .= depod1( $rstr, substr($2,0,1) );
1922       }
1923       ## If we're here and $2 ne '>': undelimited interior sequence.
1924       ## Ignored, as this is called without proper indication of where we are.
1925       ## Rely on process_text to produce diagnostics.
1926   }
1927   return $res;
1928 }
1929
1930 #
1931 # fragment_id - construct a fragment identifier from:
1932 #   a) =item text
1933 #   b) contents of C<...>
1934 #
1935 my @hc;
1936 sub fragment_id {
1937     my $text = shift();
1938     $text =~ s/\s+\Z//s;
1939     if( $text ){
1940         # a method or function?
1941         return $1 if $text =~ /(\w+)\s*\(/;
1942         return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1943
1944         # a variable name?
1945         return $1 if $text =~ /^([$@%*]\S+)/;
1946
1947         # some pattern matching operator?
1948         return $1 if $text =~ m|^(\w+/).*/\w*$|;
1949
1950         # fancy stuff... like "do { }"
1951         return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1952
1953         # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1954         # and some funnies with ... Module ...
1955         return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1956         return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1957
1958         # text? normalize!
1959         $text =~ s/\s+/_/sg;
1960         $text =~ s{(\W)}{
1961          defined( $hc[ord($1)] ) ? $hc[ord($1)]
1962                  : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1963         $text = substr( $text, 0, 50 );
1964     } else {
1965         return undef();
1966     }
1967 }
1968
1969 #
1970 # make_URL_href - generate HTML href from URL
1971 # Special treatment for CGI queries.
1972 #
1973 sub make_URL_href($){
1974     my( $url ) = @_;
1975     if( $url !~
1976         s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)?$}{<A HREF="$1$2">$1</A>}i ){
1977         $url = "<A HREF=\"$url\">$url</A>";
1978     }
1979     return $url;
1980 }
1981
1982 1;