lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::Spec::Unix;
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.02;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files. Not passing this
  54 causes all links to be absolute, since this is the value that tells
  55 Pod::Html the root of the documentation tree.
  56
  57 =item htmlroot
  58
  59     --htmlroot=name
  60
  61 Sets the base URL for the HTML files.  When cross-references are made,
  62 the HTML root is prepended to the URL.
  63
  64 =item infile
  65
  66     --infile=name
  67
  68 Specify the pod file to convert.  Input is taken from STDIN if no
  69 infile is specified.
  70
  71 =item outfile
  72
  73     --outfile=name
  74
  75 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  76 is specified.
  77
  78 =item podroot
  79
  80     --podroot=name
  81
  82 Specify the base directory for finding library pods.
  83
  84 =item podpath
  85
  86     --podpath=name:...:name
  87
  88 Specify which subdirectories of the podroot contain pod files whose
  89 HTML converted forms can be linked-to in cross-references.
  90
  91 =item libpods
  92
  93     --libpods=name:...:name
  94
  95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  96
  97 =item netscape
  98
  99     --netscape
 100
 101 Use Netscape HTML directives when applicable.
 102
 103 =item nonetscape
 104
 105     --nonetscape
 106
 107 Do not use Netscape HTML directives (default).
 108
 109 =item index
 110
 111     --index
 112
 113 Generate an index at the top of the HTML file (default behaviour).
 114
 115 =item noindex
 116
 117     --noindex
 118
 119 Do not generate an index at the top of the HTML file.
 120
 121
 122 =item recurse
 123
 124     --recurse
 125
 126 Recurse into subdirectories specified in podpath (default behaviour).
 127
 128 =item norecurse
 129
 130     --norecurse
 131
 132 Do not recurse into subdirectories specified in podpath.
 133
 134 =item title
 135
 136     --title=title
 137
 138 Specify the title of the resulting HTML file.
 139
 140 =item css
 141
 142     --css=stylesheet
 143
 144 Specify the URL of a cascading style sheet.
 145
 146 =item verbose
 147
 148     --verbose
 149
 150 Display progress messages.
 151
 152 =item quiet
 153
 154     --quiet
 155
 156 Don't display I<mostly harmless> warning messages.
 157
 158 =back
 159
 160 =head1 EXAMPLE
 161
 162     pod2html("pod2html",
 163              "--podpath=lib:ext:pod:vms",
 164              "--podroot=/usr/src/perl",
 165              "--htmlroot=/perl/nmanual",
 166              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 167              "--recurse",
 168              "--infile=foo.pod",
 169              "--outfile=/perl/nmanual/foo.html");
 170
 171 =head1 ENVIRONMENT
 172
 173 Uses $Config{pod2html} to setup default options.
 174
 175 =head1 AUTHOR
 176
 177 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 178
 179 =head1 BUGS
 180
 181 Has trouble with C<> etc in = commands.
 182
 183 =head1 SEE ALSO
 184
 185 L<perlpod>
 186
 187 =head1 COPYRIGHT
 188
 189 This program is distributed under the Artistic License.
 190
 191 =cut
 192
 193 my $dircache = "pod2html.d~~";
 194 my $itemcache = "pod2html.i~~";
 195
 196 my @begin_stack = ();           # begin/end stack
 197
 198 my @libpods = ();               # files to search for links from C<> directives
 199 my $htmlroot = "/";             # http-server base directory from which all
 200                                 #   relative paths in $podpath stem.
 201 my $htmldir = "";               # The directory to which the html pages
 202                                 # will (eventually) be written.
 203 my $htmlfile = "";              # write to stdout by default
 204 my $htmlfileurl = "" ;          # The url that other files would use to
 205                                 # refer to this file.  This is only used
 206                                 # to make relative urls that point to
 207                                 # other files.
 208 my $podfile = "";               # read from stdin by default
 209 my @podpath = ();               # list of directories containing library pods.
 210 my $podroot = ".";              # filesystem base directory from which all
 211                                 #   relative paths in $podpath stem.
 212 my $css = '';                   # Cascading style sheet
 213 my $recurse = 1;                # recurse on subdirectories in $podpath.
 214 my $quiet = 0;                  # not quiet by default
 215 my $verbose = 0;                # not verbose by default
 216 my $doindex = 1;                # non-zero if we should generate an index
 217 my $listlevel = 0;              # current list depth
 218 my @listitem = ();              # stack of HTML commands to use when a =item is
 219                                 #   encountered.  the top of the stack is the
 220                                 #   current list.
 221 my @listdata = ();              # similar to @listitem, but for the text after
 222                                 #   an =item
 223 my @listend = ();               # similar to @listitem, but the text to use to
 224                                 #   end the list.
 225 my $ignore = 1;                 # whether or not to format text.  we don't
 226                                 #   format text until we hit our first pod
 227                                 #   directive.
 228
 229 my %items_named = ();           # for the multiples of the same item in perlfunc
 230 my @items_seen = ();
 231 my $netscape = 0;               # whether or not to use netscape directives.
 232 my $title;                      # title to give the pod(s)
 233 my $header = 0;                 # produce block header/footer
 234 my $top = 1;                    # true if we are at the top of the doc.  used
 235                                 #   to prevent the first <HR> directive.
 236 my $paragraph;                  # which paragraph we're processing (used
 237                                 #   for error messages)
 238 my %pages = ();                 # associative array used to find the location
 239                                 #   of pages referenced by L<> links.
 240 my %sections = ();              # sections within this page
 241 my %items = ();                 # associative array used to find the location
 242                                 #   of =item directives referenced by C<> links
 243 my $Is83;                       # is dos with short filenames (8.3)
 244
 245 sub init_globals {
 246 $dircache = "pod2html.d~~";
 247 $itemcache = "pod2html.i~~";
 248
 249 @begin_stack = ();              # begin/end stack
 250
 251 @libpods = ();          # files to search for links from C<> directives
 252 $htmlroot = "/";                # http-server base directory from which all
 253                                 #   relative paths in $podpath stem.
 254 $htmlfile = "";         # write to stdout by default
 255 $podfile = "";          # read from stdin by default
 256 @podpath = ();          # list of directories containing library pods.
 257 $podroot = ".";         # filesystem base directory from which all
 258                                 #   relative paths in $podpath stem.
 259 $css = '';                   # Cascading style sheet
 260 $recurse = 1;           # recurse on subdirectories in $podpath.
 261 $quiet = 0;             # not quiet by default
 262 $verbose = 0;           # not verbose by default
 263 $doindex = 1;                   # non-zero if we should generate an index
 264 $listlevel = 0;         # current list depth
 265 @listitem = ();         # stack of HTML commands to use when a =item is
 266                                 #   encountered.  the top of the stack is the
 267                                 #   current list.
 268 @listdata = ();         # similar to @listitem, but for the text after
 269                                 #   an =item
 270 @listend = ();          # similar to @listitem, but the text to use to
 271                                 #   end the list.
 272 $ignore = 1;                    # whether or not to format text.  we don't
 273                                 #   format text until we hit our first pod
 274                                 #   directive.
 275
 276 @items_seen = ();
 277 %items_named = ();
 278 $netscape = 0;          # whether or not to use netscape directives.
 279 $header = 0;                    # produce block header/footer
 280 $title = '';                    # title to give the pod(s)
 281 $top = 1;                       # true if we are at the top of the doc.  used
 282                                 #   to prevent the first <HR> directive.
 283 $paragraph = '';                        # which paragraph we're processing (used
 284                                 #   for error messages)
 285 %sections = ();         # sections within this page
 286
 287 # These are not reinitialised here but are kept as a cache.
 288 # See get_cache and related cache management code.
 289 #%pages = ();                   # associative array used to find the location
 290                                 #   of pages referenced by L<> links.
 291 #%items = ();                   # associative array used to find the location
 292                                 #   of =item directives referenced by C<> links
 293 $Is83=$^O eq 'dos';
 294 }
 295
 296 sub pod2html {
 297     local(@ARGV) = @_;
 298     local($/);
 299     local $_;
 300
 301     init_globals();
 302
 303     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 304
 305     # cache of %pages and %items from last time we ran pod2html
 306
 307     #undef $opt_help if defined $opt_help;
 308
 309     # parse the command-line parameters
 310     parse_command_line();
 311
 312     # set some variables to their default values if necessary
 313     local *POD;
 314     unless (@ARGV && $ARGV[0]) {
 315         $podfile  = "-" unless $podfile;        # stdin
 316         open(POD, "<$podfile")
 317                 || die "$0: cannot open $podfile file for input: $!\n";
 318     } else {
 319         $podfile = $ARGV[0];  # XXX: might be more filenames
 320         *POD = *ARGV;
 321     }
 322     $htmlfile = "-" unless $htmlfile;   # stdout
 323     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 324     $htmldir =~ s#/$## ;                # so we don't get a //
 325     if (  $htmlroot eq ''
 326        && defined( $htmldir )
 327        && $htmldir ne ''
 328        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 329        )
 330     {
 331         # Set the 'base' url for this file, so that we can use it
 332         # as the location from which to calculate relative links
 333         # to other files. If this is '', then absolute links will
 334         # be used throughout.
 335         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 336     }
 337
 338     # read the pod a paragraph at a time
 339     warn "Scanning for sections in input file(s)\n" if $verbose;
 340     $/ = "";
 341     my @poddata  = <POD>;
 342     close(POD);
 343
 344     # scan the pod for =head[1-6] directives and build an index
 345     my $index = scan_headings(\%sections, @poddata);
 346
 347     unless($index) {
 348         warn "No pod in $podfile\n" if $verbose;
 349         return;
 350     }
 351
 352     # open the output file
 353     open(HTML, ">$htmlfile")
 354             || die "$0: cannot open $htmlfile file for output: $!\n";
 355
 356     # put a title in the HTML file if one wasn't specified
 357     if ($title eq '') {
 358         TITLE_SEARCH: {
 359             for (my $i = 0; $i < @poddata; $i++) {
 360                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 361                     for my $para ( @poddata[$i, $i+1] ) {
 362                         last TITLE_SEARCH
 363                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 364                     }
 365                 }
 366
 367             }
 368         }
 369     }
 370     if (!$title and $podfile =~ /\.pod$/) {
 371         # probably a split pod so take first =head[12] as title
 372         for (my $i = 0; $i < @poddata; $i++) {
 373             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 374         }
 375         warn "adopted '$title' as title for $podfile\n"
 376             if $verbose and $title;
 377     }
 378     if ($title) {
 379         $title =~ s/\s*\(.*\)//;
 380     } else {
 381         warn "$0: no title for $podfile" unless $quiet;
 382         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 383         $title = ($podfile eq "-" ? 'No Title' : $1);
 384         warn "using $title" if $verbose;
 385     }
 386     my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
 387     $csslink =~ s,\\,/,g;
 388     $csslink =~ s,(/.):,$1|,;
 389
 390     my $block = $header ? <<END_OF_BLOCK : '';
 391 <TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
 392 <TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
 393 <FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
 394 </TD></TR>
 395 </TABLE>
 396 END_OF_BLOCK
 397
 398     print HTML <<END_OF_HEAD;
 399 <HTML>
 400 <HEAD>
 401 <TITLE>$title</TITLE>$csslink
 402 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 403 </HEAD>
 404
 405 <BODY>
 406 $block
 407 END_OF_HEAD
 408
 409     # load/reload/validate/cache %pages and %items
 410     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 411
 412     # scan the pod for =item directives
 413     scan_items("", \%items, @poddata);
 414
 415     # put an index at the top of the file.  note, if $doindex is 0 we
 416     # still generate an index, but surround it with an html comment.
 417     # that way some other program can extract it if desired.
 418     $index =~ s/--+/-/g;
 419     print HTML "<!-- INDEX BEGIN -->\n";
 420     print HTML "<!--\n" unless $doindex;
 421     print HTML $index;
 422     print HTML "-->\n" unless $doindex;
 423     print HTML "<!-- INDEX END -->\n\n";
 424     print HTML "<HR>\n" if $doindex;
 425
 426     # now convert this file
 427     warn "Converting input file\n" if $verbose;
 428     foreach my $i (0..$#poddata) {
 429         $_ = $poddata[$i];
 430         $paragraph = $i+1;
 431         if (/^(=.*)/s) {        # is it a pod directive?
 432             $ignore = 0;
 433             $_ = $1;
 434             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 435                 process_begin($1, $2);
 436             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 437                 process_end($1, $2);
 438             } elsif (/^=cut/) {                 # =cut
 439                 process_cut();
 440             } elsif (/^=pod/) {                 # =pod
 441                 process_pod();
 442             } else {
 443                 next if @begin_stack && $begin_stack[-1] ne 'html';
 444
 445                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 446                     process_head($1, $2);
 447                 } elsif (/^=item\s*(.*\S)/sm) { # =item text
 448                     process_item($1);
 449                 } elsif (/^=over\s*(.*)/) {             # =over N
 450                     process_over();
 451                 } elsif (/^=back/) {            # =back
 452                     process_back();
 453                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 454                     process_for($1,$2);
 455                 } else {
 456                     /^=(\S*)\s*/;
 457                     warn "$0: $podfile: unknown pod directive '$1' in "
 458                        . "paragraph $paragraph.  ignoring.\n";
 459                 }
 460             }
 461             $top = 0;
 462         }
 463         else {
 464             next if $ignore;
 465             next if @begin_stack && $begin_stack[-1] ne 'html';
 466             my $text = $_;
 467             process_text(\$text, 1);
 468             print HTML "<P>\n$text</P>\n";
 469         }
 470     }
 471
 472     # finish off any pending directives
 473     finish_list();
 474     print HTML <<END_OF_TAIL;
 475 $block
 476 </BODY>
 477
 478 </HTML>
 479 END_OF_TAIL
 480
 481     # close the html file
 482     close(HTML);
 483
 484     warn "Finished\n" if $verbose;
 485 }
 486
 487 ##############################################################################
 488
 489 my $usage;                      # see below
 490 sub usage {
 491     my $podfile = shift;
 492     warn "$0: $podfile: @_\n" if @_;
 493     die $usage;
 494 }
 495
 496 $usage =<<END_OF_USAGE;
 497 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 498            --podpath=<name>:...:<name> --podroot=<name>
 499            --libpods=<name>:...:<name> --recurse --verbose --index
 500            --netscape --norecurse --noindex
 501
 502   --flush      - flushes the item and directory caches.
 503   --help       - prints this message.
 504   --htmlroot   - http-server base directory from which all relative paths
 505                  in podpath stem (default is /).
 506   --index      - generate an index at the top of the resulting html
 507                  (default).
 508   --infile     - filename for the pod to convert (input taken from stdin
 509                  by default).
 510   --libpods    - colon-separated list of pages to search for =item pod
 511                  directives in as targets of C<> and implicit links (empty
 512                  by default).  note, these are not filenames, but rather
 513                  page names like those that appear in L<> links.
 514   --netscape   - will use netscape html directives when applicable.
 515   --nonetscape - will not use netscape directives (default).
 516   --outfile    - filename for the resulting html file (output sent to
 517                  stdout by default).
 518   --podpath    - colon-separated list of directories containing library
 519                  pods.  empty by default.
 520   --podroot    - filesystem base directory from which all relative paths
 521                  in podpath stem (default is .).
 522   --noindex    - don't generate an index at the top of the resulting html.
 523   --norecurse  - don't recurse on those subdirectories listed in podpath.
 524   --recurse    - recurse on those subdirectories listed in podpath
 525                  (default behavior).
 526   --title      - title that will appear in resulting html file.
 527   --header     - produce block header/footer
 528   --css        - stylesheet URL
 529   --verbose    - self-explanatory
 530   --quiet      - supress some benign warning messages
 531
 532 END_OF_USAGE
 533
 534 sub parse_command_line {
 535     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
 536     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 537     my $result = GetOptions(
 538                             'flush'      => \$opt_flush,
 539                             'help'       => \$opt_help,
 540                             'htmldir=s'  => \$opt_htmldir,
 541                             'htmlroot=s' => \$opt_htmlroot,
 542                             'index!'     => \$opt_index,
 543                             'infile=s'   => \$opt_infile,
 544                             'libpods=s'  => \$opt_libpods,
 545                             'netscape!'  => \$opt_netscape,
 546                             'outfile=s'  => \$opt_outfile,
 547                             'podpath=s'  => \$opt_podpath,
 548                             'podroot=s'  => \$opt_podroot,
 549                             'norecurse'  => \$opt_norecurse,
 550                             'recurse!'   => \$opt_recurse,
 551                             'title=s'    => \$opt_title,
 552                             'header'     => \$opt_header,
 553                             'css=s'      => \$opt_css,
 554                             'verbose'    => \$opt_verbose,
 555                             'quiet'      => \$opt_quiet,
 556                            );
 557     usage("-", "invalid parameters") if not $result;
 558
 559     usage("-") if defined $opt_help;    # see if the user asked for help
 560     $opt_help = "";                     # just to make -w shut-up.
 561
 562     $podfile  = $opt_infile if defined $opt_infile;
 563     $htmlfile = $opt_outfile if defined $opt_outfile;
 564     $htmldir  = $opt_htmldir if defined $opt_outfile;
 565
 566     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 567     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 568
 569     warn "Flushing item and directory caches\n"
 570         if $opt_verbose && defined $opt_flush;
 571     unlink($dircache, $itemcache) if defined $opt_flush;
 572
 573     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 574     $podroot  = $opt_podroot if defined $opt_podroot;
 575
 576     $doindex  = $opt_index if defined $opt_index;
 577     $recurse  = $opt_recurse if defined $opt_recurse;
 578     $title    = $opt_title if defined $opt_title;
 579     $header   = defined $opt_header ? 1 : 0;
 580     $css      = $opt_css if defined $opt_css;
 581     $verbose  = defined $opt_verbose ? 1 : 0;
 582     $quiet    = defined $opt_quiet ? 1 : 0;
 583     $netscape = $opt_netscape if defined $opt_netscape;
 584 }
 585
 586
 587 my $saved_cache_key;
 588
 589 sub get_cache {
 590     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 591     my @cache_key_args = @_;
 592
 593     # A first-level cache:
 594     # Don't bother reading the cache files if they still apply
 595     # and haven't changed since we last read them.
 596
 597     my $this_cache_key = cache_key(@cache_key_args);
 598
 599     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 600
 601     # load the cache of %pages and %items if possible.  $tests will be
 602     # non-zero if successful.
 603     my $tests = 0;
 604     if (-f $dircache && -f $itemcache) {
 605         warn "scanning for item cache\n" if $verbose;
 606         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 607     }
 608
 609     # if we didn't succeed in loading the cache then we must (re)build
 610     #  %pages and %items.
 611     if (!$tests) {
 612         warn "scanning directories in pod-path\n" if $verbose;
 613         scan_podpath($podroot, $recurse, 0);
 614     }
 615     $saved_cache_key = cache_key(@cache_key_args);
 616 }
 617
 618 sub cache_key {
 619     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 620     return join('!', $dircache, $itemcache, $recurse,
 621         @$podpath, $podroot, stat($dircache), stat($itemcache));
 622 }
 623
 624 #
 625 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 626 #  are valid caches of %pages and %items.  if they are valid then it loads
 627 #  them and returns a non-zero value.
 628 #
 629
 630 sub load_cache {
 631     my($dircache, $itemcache, $podpath, $podroot) = @_;
 632     my($tests);
 633     local $_;
 634
 635     $tests = 0;
 636
 637     open(CACHE, "<$itemcache") ||
 638         die "$0: error opening $itemcache for reading: $!\n";
 639     $/ = "\n";
 640
 641     # is it the same podpath?
 642     $_ = <CACHE>;
 643     chomp($_);
 644     $tests++ if (join(":", @$podpath) eq $_);
 645
 646     # is it the same podroot?
 647     $_ = <CACHE>;
 648     chomp($_);
 649     $tests++ if ($podroot eq $_);
 650
 651     # load the cache if its good
 652     if ($tests != 2) {
 653         close(CACHE);
 654         return 0;
 655     }
 656
 657     warn "loading item cache\n" if $verbose;
 658     while (<CACHE>) {
 659         /(.*?) (.*)$/;
 660         $items{$1} = $2;
 661     }
 662     close(CACHE);
 663
 664     warn "scanning for directory cache\n" if $verbose;
 665     open(CACHE, "<$dircache") ||
 666         die "$0: error opening $dircache for reading: $!\n";
 667     $/ = "\n";
 668     $tests = 0;
 669
 670     # is it the same podpath?
 671     $_ = <CACHE>;
 672     chomp($_);
 673     $tests++ if (join(":", @$podpath) eq $_);
 674
 675     # is it the same podroot?
 676     $_ = <CACHE>;
 677     chomp($_);
 678     $tests++ if ($podroot eq $_);
 679
 680     # load the cache if its good
 681     if ($tests != 2) {
 682         close(CACHE);
 683         return 0;
 684     }
 685
 686     warn "loading directory cache\n" if $verbose;
 687     while (<CACHE>) {
 688         /(.*?) (.*)$/;
 689         $pages{$1} = $2;
 690     }
 691
 692     close(CACHE);
 693
 694     return 1;
 695 }
 696
 697 #
 698 # scan_podpath - scans the directories specified in @podpath for directories,
 699 #  .pod files, and .pm files.  it also scans the pod files specified in
 700 #  @libpods for =item directives.
 701 #
 702 sub scan_podpath {
 703     my($podroot, $recurse, $append) = @_;
 704     my($pwd, $dir);
 705     my($libpod, $dirname, $pod, @files, @poddata);
 706
 707     unless($append) {
 708         %items = ();
 709         %pages = ();
 710     }
 711
 712     # scan each directory listed in @podpath
 713     $pwd = getcwd();
 714     chdir($podroot)
 715         || die "$0: error changing to directory $podroot: $!\n";
 716     foreach $dir (@podpath) {
 717         scan_dir($dir, $recurse);
 718     }
 719
 720     # scan the pods listed in @libpods for =item directives
 721     foreach $libpod (@libpods) {
 722         # if the page isn't defined then we won't know where to find it
 723         # on the system.
 724         next unless defined $pages{$libpod} && $pages{$libpod};
 725
 726         # if there is a directory then use the .pod and .pm files within it.
 727         # NOTE: Only finds the first so-named directory in the tree.
 728 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 729         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 730             #  find all the .pod and .pm files within the directory
 731             $dirname = $1;
 732             opendir(DIR, $dirname) ||
 733                 die "$0: error opening directory $dirname: $!\n";
 734             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 735             closedir(DIR);
 736
 737             # scan each .pod and .pm file for =item directives
 738             foreach $pod (@files) {
 739                 open(POD, "<$dirname/$pod") ||
 740                     die "$0: error opening $dirname/$pod for input: $!\n";
 741                 @poddata = <POD>;
 742                 close(POD);
 743
 744                 scan_items("$dirname/$pod", @poddata);
 745             }
 746
 747             # use the names of files as =item directives too.
 748             foreach $pod (@files) {
 749                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 750                 $items{$1} = "$dirname/$1.html" if $1;
 751             }
 752         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 753                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 754             # scan the .pod or .pm file for =item directives
 755             $pod = $1;
 756             open(POD, "<$pod") ||
 757                 die "$0: error opening $pod for input: $!\n";
 758             @poddata = <POD>;
 759             close(POD);
 760
 761             scan_items("$pod", @poddata);
 762         } else {
 763             warn "$0: shouldn't be here (line ".__LINE__."\n";
 764         }
 765     }
 766     @poddata = ();      # clean-up a bit
 767
 768     chdir($pwd)
 769         || die "$0: error changing to directory $pwd: $!\n";
 770
 771     # cache the item list for later use
 772     warn "caching items for later use\n" if $verbose;
 773     open(CACHE, ">$itemcache") ||
 774         die "$0: error open $itemcache for writing: $!\n";
 775
 776     print CACHE join(":", @podpath) . "\n$podroot\n";
 777     foreach my $key (keys %items) {
 778         print CACHE "$key $items{$key}\n";
 779     }
 780
 781     close(CACHE);
 782
 783     # cache the directory list for later use
 784     warn "caching directories for later use\n" if $verbose;
 785     open(CACHE, ">$dircache") ||
 786         die "$0: error open $dircache for writing: $!\n";
 787
 788     print CACHE join(":", @podpath) . "\n$podroot\n";
 789     foreach my $key (keys %pages) {
 790         print CACHE "$key $pages{$key}\n";
 791     }
 792
 793     close(CACHE);
 794 }
 795
 796 #
 797 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 798 #  files, and .pm files.  notes those that it finds.  this information will
 799 #  be used later in order to figure out where the pages specified in L<>
 800 #  links are on the filesystem.
 801 #
 802 sub scan_dir {
 803     my($dir, $recurse) = @_;
 804     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 805     local $_;
 806
 807     @subdirs = ();
 808     @pods = ();
 809
 810     opendir(DIR, $dir) ||
 811         die "$0: error opening directory $dir: $!\n";
 812     while (defined($_ = readdir(DIR))) {
 813         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 814             $pages{$_}  = "" unless defined $pages{$_};
 815             $pages{$_} .= "$dir/$_:";
 816             push(@subdirs, $_);
 817         } elsif (/\.pod$/) {                                # .pod
 818             s/\.pod$//;
 819             $pages{$_}  = "" unless defined $pages{$_};
 820             $pages{$_} .= "$dir/$_.pod:";
 821             push(@pods, "$dir/$_.pod");
 822         } elsif (/\.pm$/) {                                 # .pm
 823             s/\.pm$//;
 824             $pages{$_}  = "" unless defined $pages{$_};
 825             $pages{$_} .= "$dir/$_.pm:";
 826             push(@pods, "$dir/$_.pm");
 827         }
 828     }
 829     closedir(DIR);
 830
 831     # recurse on the subdirectories if necessary
 832     if ($recurse) {
 833         foreach my $subdir (@subdirs) {
 834             scan_dir("$dir/$subdir", $recurse);
 835         }
 836     }
 837 }
 838
 839 #
 840 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 841 #  build an index.
 842 #
 843 sub scan_headings {
 844     my($sections, @data) = @_;
 845     my($tag, $which_head, $title, $listdepth, $index);
 846
 847     # here we need      local $ignore = 0;
 848     #  unfortunately, we can't have it, because $ignore is lexical
 849     $ignore = 0;
 850
 851     $listdepth = 0;
 852     $index = "";
 853
 854     # scan for =head directives, note their name, and build an index
 855     #  pointing to each of them.
 856     foreach my $line (@data) {
 857         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 858             ($tag,$which_head, $title) = ($1,$2,$3);
 859             chomp($title);
 860             $$sections{htmlify(0,$title)} = 1;
 861
 862             while ($which_head != $listdepth) {
 863                 if ($which_head > $listdepth) {
 864                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 865                     $listdepth++;
 866                 } elsif ($which_head < $listdepth) {
 867                     $listdepth--;
 868                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 869                 }
 870             }
 871
 872             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 873                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 874                       html_escape(process_text(\$title, 0)) . "</A></LI>";
 875         }
 876     }
 877
 878     # finish off the lists
 879     while ($listdepth--) {
 880         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 881     }
 882
 883     # get rid of bogus lists
 884     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 885
 886     $ignore = 1;        # restore old value;
 887
 888     return $index;
 889 }
 890
 891 #
 892 # scan_items - scans the pod specified by $pod for =item directives.  we
 893 #  will use this information later on in resolving C<> links.
 894 #
 895 sub scan_items {
 896     my($pod, @poddata) = @_;
 897     my($i, $item);
 898     local $_;
 899
 900     $pod =~ s/\.pod$//;
 901     $pod .= ".html" if $pod;
 902
 903     foreach $i (0..$#poddata) {
 904         $_ = $poddata[$i];
 905
 906         # remove any formatting instructions
 907         s,[A-Z]<([^<>]*)>,$1,g;
 908
 909         # figure out what kind of item it is and get the first word of
 910         #  it's name.
 911         if (/^=item\s+(\w*)\s*.*$/s) {
 912             if ($1 eq "*") {            # bullet list
 913                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 914                 $item = $1;
 915             } elsif ($1 =~ /^\d+/) {    # numbered list
 916                 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
 917                 $item = $1;
 918             } else {
 919 #               /\A=item\s+(.*?)\s*\Z/s;
 920                 /\A=item\s+(\w*)/s;
 921                 $item = $1;
 922             }
 923
 924             $items{$item} = "$pod" if $item;
 925         }
 926     }
 927 }
 928
 929 #
 930 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 931 #
 932 sub process_head {
 933     my($tag, $heading) = @_;
 934     my $firstword;
 935
 936     # figure out the level of the =head
 937     $tag =~ /head([1-6])/;
 938     my $level = $1;
 939
 940     # can't have a heading full of spaces and speechmarks and so on
 941     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 942
 943     print HTML "<P>\n" unless $listlevel;
 944     print HTML "<HR>\n" unless $listlevel || $top;
 945     print HTML "<H$level>"; # unless $listlevel;
 946     #print HTML "<H$level>" unless $listlevel;
 947     my $convert = $heading; process_text(\$convert, 0);
 948     $convert = html_escape($convert);
 949     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 950     print HTML "</H$level>"; # unless $listlevel;
 951     print HTML "\n";
 952 }
 953
 954 #
 955 # process_item - convert a pod item tag and convert it to HTML format.
 956 #
 957 sub process_item {
 958     my $text = $_[0];
 959     my($i, $quote, $name);
 960
 961     my $need_preamble = 0;
 962     my $this_entry;
 963
 964
 965     # lots of documents start a list without doing an =over.  this is
 966     # bad!  but, the proper thing to do seems to be to just assume
 967     # they did do an =over.  so warn them once and then continue.
 968     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 969         unless $listlevel;
 970     process_over() unless $listlevel;
 971
 972     return unless $listlevel;
 973
 974     # remove formatting instructions from the text
 975     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 976     pre_escape(\$text);
 977
 978     $need_preamble = $items_seen[$listlevel]++ == 0;
 979
 980     # check if this is the first =item after an =over
 981     $i = $listlevel - 1;
 982     my $need_new = $listlevel >= @listitem;
 983
 984     if ($text =~ /\A\*/) {              # bullet
 985
 986         if ($need_preamble) {
 987             push(@listend,  "</UL>");
 988             print HTML "<UL>\n";
 989         }
 990
 991         print HTML '<LI>';
 992         if ($text =~ /\A\*\s*(.+)\Z/s) {
 993             print HTML '<STRONG>';
 994             if ($items_named{$1}++) {
 995                 print HTML html_escape($1);
 996             } else {
 997                 my $name = 'item_' . htmlify(1,$1);
 998                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 999             }
1000             print HTML '</STRONG>';
1001         }
1002
1003     } elsif ($text =~ /\A[\d#]+/) {     # numbered list
1004
1005         if ($need_preamble) {
1006             push(@listend,  "</OL>");
1007             print HTML "<OL>\n";
1008         }
1009
1010         print HTML '<LI>';
1011         if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
1012             print HTML '<STRONG>';
1013             if ($items_named{$1}++) {
1014                 print HTML html_escape($1);
1015             } else {
1016                 my $name = 'item_' . htmlify(0,$1);
1017                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
1018             }
1019             print HTML '</STRONG>';
1020         }
1021
1022     } else {                    # all others
1023
1024         if ($need_preamble) {
1025             push(@listend,  '</DL>');
1026             print HTML "<DL>\n";
1027         }
1028
1029         print HTML '<DT>';
1030         if ($text =~ /(\S+)/) {
1031             print HTML '<STRONG>';
1032             if ($items_named{$1}++) {
1033                 print HTML html_escape($text);
1034             } else {
1035                 my $name = 'item_' . htmlify(1,$text);
1036                 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
1037             }
1038             print HTML '</STRONG>';
1039         }
1040        print HTML '<DD>';
1041     }
1042
1043     print HTML "\n";
1044 }
1045
1046 #
1047 # process_over - process a pod over tag and start a corresponding HTML
1048 # list.
1049 #
1050 sub process_over {
1051     # start a new list
1052     $listlevel++;
1053 }
1054
1055 #
1056 # process_back - process a pod back tag and convert it to HTML format.
1057 #
1058 sub process_back {
1059     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
1060         unless $listlevel;
1061     return unless $listlevel;
1062
1063     # close off the list.  note, I check to see if $listend[$listlevel] is
1064     # defined because an =item directive may have never appeared and thus
1065     # $listend[$listlevel] may have never been initialized.
1066     $listlevel--;
1067     print HTML $listend[$listlevel] if defined $listend[$listlevel];
1068     print HTML "\n";
1069
1070     # don't need the corresponding perl code anymore
1071     pop(@listitem);
1072     pop(@listdata);
1073     pop(@listend);
1074
1075     pop(@items_seen);
1076 }
1077
1078 #
1079 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1080 #
1081 sub process_cut {
1082     $ignore = 1;
1083 }
1084
1085 #
1086 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1087 # corresponding cut.
1088 #
1089 sub process_pod {
1090     # no need to set $ignore to 0 cause the main loop did it
1091 }
1092
1093 #
1094 # process_for - process a =for pod tag.  if it's for html, split
1095 # it out verbatim, if illustration, center it, otherwise ignore it.
1096 #
1097 sub process_for {
1098     my($whom, $text) = @_;
1099     if ( $whom =~ /^(pod2)?html$/i) {
1100         print HTML $text;
1101     } elsif ($whom =~ /^illustration$/i) {
1102         1 while chomp $text;
1103         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1104           $text .= $ext, last if -r "$text$ext";
1105         }
1106         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1107     }
1108 }
1109
1110 #
1111 # process_begin - process a =begin pod tag.  this pushes
1112 # whom we're beginning on the begin stack.  if there's a
1113 # begin stack, we only print if it us.
1114 #
1115 sub process_begin {
1116     my($whom, $text) = @_;
1117     $whom = lc($whom);
1118     push (@begin_stack, $whom);
1119     if ( $whom =~ /^(pod2)?html$/) {
1120         print HTML $text if $text;
1121     }
1122 }
1123
1124 #
1125 # process_end - process a =end pod tag.  pop the
1126 # begin stack.  die if we're mismatched.
1127 #
1128 sub process_end {
1129     my($whom, $text) = @_;
1130     $whom = lc($whom);
1131     if ($begin_stack[-1] ne $whom ) {
1132         die "Unmatched begin/end at chunk $paragraph\n"
1133     }
1134     pop @begin_stack;
1135 }
1136
1137 #
1138 # process_text - handles plaintext that appears in the input pod file.
1139 # there may be pod commands embedded within the text so those must be
1140 # converted to html commands.
1141 #
1142 sub process_text {
1143     my($text, $escapeQuotes) = @_;
1144     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1145     my($podcommand, $params, $tag, $quote);
1146
1147     return if $ignore;
1148
1149     $quote  = 0;                # status of double-quote conversion
1150     $result = "";
1151     $rest = $$text;
1152
1153     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1154         $rest =~ s/\n+\Z//;
1155         $rest =~ s#.*#
1156             my $line = $&;
1157             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1158             $line;
1159         #eg;
1160
1161         $rest   =~ s/&/&amp;/g;
1162         $rest   =~ s/</&lt;/g;
1163         $rest   =~ s/>/&gt;/g;
1164         $rest   =~ s/"/&quot;/g;
1165
1166         # try and create links for all occurrences of perl.* within
1167         # the preformatted text.
1168         $rest =~ s{
1169                     (\s*)(perl\w+)
1170                   }{
1171                     if (defined $pages{$2}) {   # is a link
1172                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1173                     } elsif (defined $pages{dosify($2)}) {      # is a link
1174                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1175                     } else {
1176                         "$1$2";
1177                     }
1178                   }xeg;
1179 #       $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1180         $rest =~ s{
1181                     (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1182                   }{
1183                     my $url ;
1184                     if ( $htmlfileurl ne '' ) {
1185                         # Here, we take advantage of the knowledge
1186                         # that $htmlfileurl ne '' implies $htmlroot eq ''.
1187                         # Since $htmlroot eq '', we need to prepend $htmldir
1188                         # on the fron of the link to get the absolute path
1189                         # of the link's target. We check for a leading '/'
1190                         # to avoid corrupting links that are #, file:, etc.
1191                         my $old_url = $3 ;
1192                         $old_url = "$htmldir$old_url"
1193                             if ( $old_url =~ m{^\/} ) ;
1194                         $url = relativize_url( "$old_url.html", $htmlfileurl );
1195 # print( "  a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1196                     }
1197                     else {
1198                         $url = "$3.html" ;
1199                     }
1200                     "$1$url" ;
1201                   }xeg;
1202
1203   # Look for embedded URLs and make them in to links.  We don't
1204   # relativize them since they are best left as the author intended.
1205   my $urls = '(' . join ('|', qw{
1206                 http
1207                 telnet
1208                 mailto
1209                 news
1210                 gopher
1211                 file
1212                 wais
1213                 ftp
1214             } )
1215         . ')';
1216
1217   my $ltrs = '\w';
1218   my $gunk = '/#~:.?+=&%@!\-';
1219   my $punc = '.:?\-';
1220   my $any  = "${ltrs}${gunk}${punc}";
1221
1222   $rest =~ s{
1223         \b                          # start at word boundary
1224         (                           # begin $1  {
1225           $urls     :               # need resource and a colon
1226           (?!:)                     # Ignore File::, among others.
1227           [$any] +?                 # followed by on or more
1228                                     #  of any valid character, but
1229                                     #  be conservative and take only
1230                                     #  what you need to....
1231         )                           # end   $1  }
1232         (?=                         # look-ahead non-consumptive assertion
1233                 [$punc]*            # either 0 or more puntuation
1234                 [^$any]             #   followed by a non-url char
1235             |                       # or else
1236                 $                   #   then end of the string
1237         )
1238       }{<A HREF="$1">$1</A>}igox;
1239
1240         $result =   "<PRE>"     # text should be as it is (verbatim)
1241                   . "$rest\n"
1242                   . "</PRE>\n";
1243     } else {                    # formatted text
1244         # parse through the string, stopping each time we find a
1245         # pod-escape.  once the string has been throughly processed
1246         # we can output it.
1247         while (length $rest) {
1248             # check to see if there are any possible pod directives in
1249             # the remaining part of the text.
1250             if ($rest =~ m/[BCEIFLSZ]</) {
1251                 warn "\$rest\t= $rest\n" unless
1252                     $rest =~ /\A
1253                            ([^<]*?)
1254                            ([BCEIFLSZ]?)
1255                            <
1256                            (.*)\Z/xs;
1257
1258                 $s1 = $1;       # pure text
1259                 $s2 = $2;       # the type of pod-escape that follows
1260                 $s3 = '<';      # '<'
1261                 $s4 = $3;       # the rest of the string
1262             } else {
1263                 $s1 = $rest;
1264                 $s2 = "";
1265                 $s3 = "";
1266                 $s4 = "";
1267             }
1268
1269             if ($s3 eq '<' && $s2) {    # a pod-escape
1270                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1271                 $podcommand = "$s2<";
1272                 $rest       = $s4;
1273
1274                 # find the matching '>'
1275                 $match = 1;
1276                 $bf = 0;
1277                 while ($match && !$bf) {
1278                     $bf = 1;
1279                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1280                         $bf = 0;
1281                         $match++;
1282                         $podcommand .= $1;
1283                         $rest        = $2;
1284                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1285                         $bf = 0;
1286                         $match--;
1287                         $podcommand .= $1;
1288                         $rest        = $2;
1289                     }
1290                 }
1291
1292                 if ($match != 0) {
1293                     warn <<WARN;
1294 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1295 WARN
1296                     $result .= substr $podcommand, 0, 2;
1297                     $rest = substr($podcommand, 2) . $rest;
1298                     next;
1299                 }
1300
1301                 # pull out the parameters to the pod-escape
1302                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1303                 $tag    = $1;
1304                 $params = $2;
1305
1306                 # process the text within the pod-escape so that any escapes
1307                 # which must occur do.
1308                 process_text(\$params, 0) unless $tag eq 'L';
1309
1310                 $s1 = $params;
1311                 if (!$tag || $tag eq " ") {     #  <> : no tag
1312                     $s1 = "&lt;$params&gt;";
1313                 } elsif ($tag eq "L") {         # L<> : link
1314                     $s1 = process_L($params);
1315                 } elsif ($tag eq "I" ||         # I<> : italicize text
1316                          $tag eq "B" ||         # B<> : bold text
1317                          $tag eq "F") {         # F<> : file specification
1318                     $s1 = process_BFI($tag, $params);
1319                 } elsif ($tag eq "C") {         # C<> : literal code
1320                     $s1 = process_C($params, 1);
1321                 } elsif ($tag eq "E") {         # E<> : escape
1322                     $s1 = process_E($params);
1323                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1324                     $s1 = process_Z($params);
1325                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1326                     $s1 = process_S($params);
1327                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1328                     $s1 = process_X($params);
1329                 } else {
1330                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1331                 }
1332
1333                 $result .= "$s1";
1334             } else {
1335                 # for pure text we must deal with implicit links and
1336                 # double-quotes among other things.
1337                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1338                 $rest    = $s4;
1339             }
1340         }
1341     }
1342     $$text = $result;
1343 }
1344
1345 sub html_escape {
1346     my $rest = $_[0];
1347     $rest   =~ s/&(?!\w+;|#)/&amp;/g;   # XXX not bulletproof
1348     $rest   =~ s/</&lt;/g;
1349     $rest   =~ s/>/&gt;/g;
1350     $rest   =~ s/"/&quot;/g;
1351     return $rest;
1352 }
1353
1354 #
1355 # process_puretext - process pure text (without pod-escapes) converting
1356 #  double-quotes and handling implicit C<> links.
1357 #
1358 sub process_puretext {
1359     my($text, $quote) = @_;
1360     my(@words, $result, $rest, $lead, $trail);
1361
1362     # convert double-quotes to single-quotes
1363     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1364     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1365
1366     $$quote = ($text =~ m/"/ ? 1 : 0);
1367     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1368
1369     # keep track of leading and trailing white-space
1370     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1371     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1372
1373     # collapse all white space into a single space
1374     $text =~ s/\s+/ /g;
1375     @words = split(" ", $text);
1376
1377     # process each word individually
1378     foreach my $word (@words) {
1379         # see if we can infer a link
1380         if ($word =~ /^\w+\(/) {
1381             # has parenthesis so should have been a C<> ref
1382             $word = process_C($word);
1383 #           $word =~ /^[^()]*]\(/;
1384 #           if (defined $items{$1} && $items{$1}) {
1385 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1386 #                       . htmlify(0,$word)
1387 #                       . "\">$word</A></CODE>";
1388 #           } elsif (defined $items{$word} && $items{$word}) {
1389 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1390 #                       . htmlify(0,$word)
1391 #                       . "\">$word</A></CODE>";
1392 #           } else {
1393 #               $word =   "\n<CODE><A HREF=\"#item_"
1394 #                       . htmlify(0,$word)
1395 #                       . "\">$word</A></CODE>";
1396 #           }
1397         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1398             # perl variables, should be a C<> ref
1399             $word = process_C($word, 1);
1400         } elsif ($word =~ m,^\w+://\w,) {
1401             # looks like a URL
1402             # Don't relativize it: leave it as the author intended
1403             $word = qq(<A HREF="$word">$word</A>);
1404         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1405             # looks like an e-mail address
1406             my ($w1, $w2, $w3) = ("", $word, "");
1407             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1408             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1409             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1410         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1411             $word = html_escape($word) if $word =~ /["&<>]/;
1412             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1413         } else {
1414             $word = html_escape($word) if $word =~ /["&<>]/;
1415         }
1416     }
1417
1418     # build a new string based upon our conversion
1419     $result = "";
1420     $rest   = join(" ", @words);
1421     while (length($rest) > 75) {
1422         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1423              $rest =~ m/^(\S*)\s(.*?)$/o) {
1424
1425             $result .= "$1\n";
1426             $rest    = $2;
1427         } else {
1428             $result .= "$rest\n";
1429             $rest    = "";
1430         }
1431     }
1432     $result .= $rest if $rest;
1433
1434     # restore the leading and trailing white-space
1435     $result = "$lead$result$trail";
1436
1437     return $result;
1438 }
1439
1440 #
1441 # pre_escape - convert & in text to $amp;
1442 #
1443 sub pre_escape {
1444     my($str) = @_;
1445     $$str =~ s/&(?!\w+;|#)/&amp;/g;     # XXX not bulletproof
1446 }
1447
1448 #
1449 # dosify - convert filenames to 8.3
1450 #
1451 sub dosify {
1452     my($str) = @_;
1453     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1454     if ($Is83) {
1455         $str = lc $str;
1456         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1457         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1458     }
1459     return $str;
1460 }
1461
1462 #
1463 # process_L - convert a pod L<> directive to a corresponding HTML link.
1464 #  most of the links made are inferred rather than known about directly
1465 #  (i.e it's not known whether the =head\d section exists in the target file,
1466 #   or whether a .pod file exists in the case of split files).  however, the
1467 #  guessing usually works.
1468 #
1469 # Unlike the other directives, this should be called with an unprocessed
1470 # string, else tags in the link won't be matched.
1471 #
1472 sub process_L {
1473     my($str) = @_;
1474     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1475
1476     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1477     $s1 = $str;
1478     for ($s1) {
1479         # LREF: a la HREF L<show this text|man/section>
1480         $linktext = $1 if s:^([^|]+)\|::;
1481
1482         # make sure sections start with a /
1483         s,^",/",g;
1484         s,^,/,g if (!m,/, && / /);
1485
1486         # check if there's a section specified
1487         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1488             ($page, $section) = ($1, $2);
1489         } else {                        # no
1490             ($page, $section) = ($str, "");
1491         }
1492
1493         # check if we know that this is a section in this page
1494         if (!defined $pages{$page} && defined $sections{$page}) {
1495             $section = $page;
1496             $page = "";
1497         }
1498
1499         # remove trailing punctuation, like ()
1500         $section =~ s/\W*$// ;
1501     }
1502
1503     $page83=dosify($page);
1504     $page=$page83 if (defined $pages{$page83});
1505     if ($page eq "") {
1506         $link = "#" . htmlify(0,$section);
1507         $linktext = $section unless defined($linktext);
1508     } elsif ( $page =~ /::/ ) {
1509         $linktext  = ($section ? "$section" : "$page");
1510         $page =~ s,::,/,g;
1511         # Search page cache for an entry keyed under the html page name,
1512         # then look to see what directory that page might be in.  NOTE:
1513         # this will only find one page. A better solution might be to produce
1514         # an intermediate page that is an index to all such pages.
1515         my $page_name = $page ;
1516         $page_name =~ s,^.*/,, ;
1517         if ( defined( $pages{ $page_name } ) &&
1518              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1519            ) {
1520             $page = $1 ;
1521         }
1522         else {
1523             # NOTE: This branch assumes that all A::B pages are located in
1524             # $htmlroot/A/B.html . This is often incorrect, since they are
1525             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1526             # analyze the contents of %pages and figure out where any
1527             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1528             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1529             # lib/A/B.pm. This is also limited, but it's an improvement.
1530             # Maybe a hints file so that the links point to the correct places
1531             # non-theless?
1532             # Also, maybe put a warn "$0: cannot resolve..." here.
1533         }
1534         $link = "$htmlroot/$page.html";
1535         $link .= "#" . htmlify(0,$section) if ($section);
1536     } elsif (!defined $pages{$page}) {
1537         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n" unless $quiet;
1538         $link = "";
1539         $linktext = $page unless defined($linktext);
1540     } else {
1541         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1542         $section = htmlify(0,$section) if $section ne "";
1543
1544         # if there is a directory by the name of the page, then assume that an
1545         # appropriate section will exist in the subdirectory
1546 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1547         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1548             $link = "$htmlroot/$1/$section.html";
1549
1550         # since there is no directory by the name of the page, the section will
1551         # have to exist within a .html of the same name.  thus, make sure there
1552         # is a .pod or .pm that might become that .html
1553         } else {
1554             $section = "#$section";
1555             # check if there is a .pod with the page name
1556             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1557                 $link = "$htmlroot/$1.html$section";
1558             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1559                 $link = "$htmlroot/$1.html$section";
1560             } else {
1561                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1562                              "no .pod or .pm found\n";
1563                 $link = "";
1564                 $linktext = $section unless defined($linktext);
1565             }
1566         }
1567     }
1568
1569     process_text(\$linktext, 0);
1570     if ($link) {
1571         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1572         # implies $htmlroot eq ''. This means that the link in question
1573         # needs a prefix of $htmldir if it begins with '/'. The test for
1574         # the initial '/' is done to avoid '#'-only links, and to allow
1575         # for other kinds of links, like file:, ftp:, etc.
1576         my $url ;
1577         if (  $htmlfileurl ne '' ) {
1578             $link = "$htmldir$link"
1579                 if ( $link =~ m{^/} ) ;
1580
1581             $url = relativize_url( $link, $htmlfileurl ) ;
1582 # print( "  b: [$link,$htmlfileurl,$url]\n" ) ;
1583         }
1584         else {
1585             $url = $link ;
1586         }
1587
1588         $s1 = "<A HREF=\"$url\">$linktext</A>";
1589     } else {
1590         $s1 = "<EM>$linktext</EM>";
1591     }
1592     return $s1;
1593 }
1594
1595 #
1596 # relativize_url - convert an absolute URL to one relative to a base URL.
1597 # Assumes both end in a filename.
1598 #
1599 sub relativize_url {
1600     my ($dest,$source) = @_ ;
1601
1602     my ($dest_volume,$dest_directory,$dest_file) =
1603         File::Spec::Unix->splitpath( $dest ) ;
1604     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1605
1606     my ($source_volume,$source_directory,$source_file) =
1607         File::Spec::Unix->splitpath( $source ) ;
1608     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1609
1610     my $rel_path = '' ;
1611     if ( $dest ne '' ) {
1612        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1613     }
1614
1615     if ( $rel_path ne ''                &&
1616          substr( $rel_path, -1 ) ne '/' &&
1617          substr( $dest_file, 0, 1 ) ne '#'
1618         ) {
1619         $rel_path .= "/$dest_file" ;
1620     }
1621     else {
1622         $rel_path .= "$dest_file" ;
1623     }
1624
1625     return $rel_path ;
1626 }
1627
1628 #
1629 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1630 # convert them to corresponding HTML directives.
1631 #
1632 sub process_BFI {
1633     my($tag, $str) = @_;
1634     my($s1);                    # work string
1635     my(%repltext) = (   'B' => 'STRONG',
1636                         'F' => 'EM',
1637                         'I' => 'EM');
1638
1639     # extract the modified text and convert to HTML
1640     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1641     return $s1;
1642 }
1643
1644 #
1645 # process_C - process the C<> pod-escape.
1646 #
1647 sub process_C {
1648     my($str, $doref) = @_;
1649     my($s1, $s2);
1650
1651     $s1 = $str;
1652     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1653     $s2 = $s1;
1654     $s1 =~ s/\W//g;             # delete bogus characters
1655     $str = html_escape($str);
1656
1657     # if there was a pod file that we found earlier with an appropriate
1658     # =item directive, then create a link to that page.
1659     if ($doref && defined $items{$s1}) {
1660         if ( $items{$s1} ) {
1661             my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
1662             # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1663             # implies $htmlroot eq ''.
1664             my $url ;
1665             if (  $htmlfileurl ne '' ) {
1666                 $link = "$htmldir$link" ;
1667                 $url = relativize_url( $link, $htmlfileurl ) ;
1668             }
1669             else {
1670                 $url = $link ;
1671             }
1672             $s1 = "<A HREF=\"$url\">$str</A>" ;
1673         }
1674         else {
1675             $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>" ;
1676         }
1677         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1678         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1679     } else {
1680         $s1 = "<CODE>$str</CODE>";
1681         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1682     }
1683
1684
1685     return $s1;
1686 }
1687
1688 #
1689 # process_E - process the E<> pod directive which seems to escape a character.
1690 #
1691 sub process_E {
1692     my($str) = @_;
1693
1694     for ($str) {
1695         s,([^/].*),\&$1\;,g;
1696     }
1697
1698     return $str;
1699 }
1700
1701 #
1702 # process_Z - process the Z<> pod directive which really just amounts to
1703 # ignoring it.  this allows someone to start a paragraph with an =
1704 #
1705 sub process_Z {
1706     my($str) = @_;
1707
1708     # there is no equivalent in HTML for this so just ignore it.
1709     $str = "";
1710     return $str;
1711 }
1712
1713 #
1714 # process_S - process the S<> pod directive which means to convert all
1715 # spaces in the string to non-breaking spaces (in HTML-eze).
1716 #
1717 sub process_S {
1718     my($str) = @_;
1719
1720     # convert all spaces in the text to non-breaking spaces in HTML.
1721     $str =~ s/ /&nbsp;/g;
1722     return $str;
1723 }
1724
1725 #
1726 # process_X - this is supposed to make an index entry.  we'll just
1727 # ignore it.
1728 #
1729 sub process_X {
1730     return '';
1731 }
1732
1733
1734 #
1735 # Adapted from Nick Ing-Simmons' PodToHtml package.
1736 sub relative_url {
1737     my $source_file = shift ;
1738     my $destination_file = shift;
1739
1740     my $source = URI::file->new_abs($source_file);
1741     my $uo = URI::file->new($destination_file,$source)->abs;
1742     return $uo->rel->as_string;
1743 }
1744
1745
1746 #
1747 # finish_list - finish off any pending HTML lists.  this should be called
1748 # after the entire pod file has been read and converted.
1749 #
1750 sub finish_list {
1751     while ($listlevel > 0) {
1752         print HTML "</DL>\n";
1753         $listlevel--;
1754     }
1755 }
1756
1757 #
1758 # htmlify - converts a pod section specification to a suitable section
1759 # specification for HTML.  if first arg is 1, only takes 1st word.
1760 #
1761 sub htmlify {
1762     my($compact, $heading) = @_;
1763
1764     if ($compact) {
1765       $heading =~ /^(\w+)/;
1766       $heading = $1;
1767     }
1768
1769   # $heading = lc($heading);
1770   $heading =~ s/[^\w\s]/_/g;
1771   $heading =~ s/(\s+)/ /g;
1772   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1773   $heading =~ s/ /_/g;
1774   $heading =~ s/\A(.{32}).*\Z/$1/s;
1775   $heading =~ s/\s+\Z//;
1776   $heading =~ s/_{2,}/_/g;
1777
1778   return $heading;
1779 }
1780
1781 BEGIN {
1782 }
1783
1784 1;