lib/Pod/Html.pm

   1 package Pod::Html;
   2
   3 use Pod::Functions;
   4 use Getopt::Long;       # package for handling command-line parameters
   5 use File::Spec::Unix;
   6 require Exporter;
   7 use vars qw($VERSION);
   8 $VERSION = 1.02;
   9 @ISA = Exporter;
  10 @EXPORT = qw(pod2html htmlify);
  11 use Cwd;
  12
  13 use Carp;
  14
  15 use locale;     # make \w work right in non-ASCII lands
  16
  17 use strict;
  18
  19 use Config;
  20
  21 =head1 NAME
  22
  23 Pod::Html - module to convert pod files to HTML
  24
  25 =head1 SYNOPSIS
  26
  27     use Pod::Html;
  28     pod2html([options]);
  29
  30 =head1 DESCRIPTION
  31
  32 Converts files from pod format (see L<perlpod>) to HTML format.  It
  33 can automatically generate indexes and cross-references, and it keeps
  34 a cache of things it knows how to cross-reference.
  35
  36 =head1 ARGUMENTS
  37
  38 Pod::Html takes the following arguments:
  39
  40 =over 4
  41
  42 =item help
  43
  44     --help
  45
  46 Displays the usage message.
  47
  48 =item htmldir
  49
  50     --htmldir=name
  51
  52 Sets the directory in which the resulting HTML file is placed.  This
  53 is used to generate relative links to other files. Not passing this
  54 causes all links to be absolute, since this is the value that tells
  55 Pod::Html the root of the documentation tree.
  56
  57 =item htmlroot
  58
  59     --htmlroot=name
  60
  61 Sets the base URL for the HTML files.  When cross-references are made,
  62 the HTML root is prepended to the URL.
  63
  64 =item infile
  65
  66     --infile=name
  67
  68 Specify the pod file to convert.  Input is taken from STDIN if no
  69 infile is specified.
  70
  71 =item outfile
  72
  73     --outfile=name
  74
  75 Specify the HTML file to create.  Output goes to STDOUT if no outfile
  76 is specified.
  77
  78 =item podroot
  79
  80     --podroot=name
  81
  82 Specify the base directory for finding library pods.
  83
  84 =item podpath
  85
  86     --podpath=name:...:name
  87
  88 Specify which subdirectories of the podroot contain pod files whose
  89 HTML converted forms can be linked-to in cross-references.
  90
  91 =item libpods
  92
  93     --libpods=name:...:name
  94
  95 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  96
  97 =item netscape
  98
  99     --netscape
 100
 101 Use Netscape HTML directives when applicable.
 102
 103 =item nonetscape
 104
 105     --nonetscape
 106
 107 Do not use Netscape HTML directives (default).
 108
 109 =item index
 110
 111     --index
 112
 113 Generate an index at the top of the HTML file (default behaviour).
 114
 115 =item noindex
 116
 117     --noindex
 118
 119 Do not generate an index at the top of the HTML file.
 120
 121
 122 =item recurse
 123
 124     --recurse
 125
 126 Recurse into subdirectories specified in podpath (default behaviour).
 127
 128 =item norecurse
 129
 130     --norecurse
 131
 132 Do not recurse into subdirectories specified in podpath.
 133
 134 =item title
 135
 136     --title=title
 137
 138 Specify the title of the resulting HTML file.
 139
 140 =item css
 141
 142     --css=stylesheet
 143
 144 Specify the URL of a cascading style sheet.
 145
 146 =item verbose
 147
 148     --verbose
 149
 150 Display progress messages.
 151
 152 =item quiet
 153
 154     --quiet
 155
 156 Don't display I<mostly harmless> warning messages.
 157
 158 =back
 159
 160 =head1 EXAMPLE
 161
 162     pod2html("pod2html",
 163              "--podpath=lib:ext:pod:vms",
 164              "--podroot=/usr/src/perl",
 165              "--htmlroot=/perl/nmanual",
 166              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 167              "--recurse",
 168              "--infile=foo.pod",
 169              "--outfile=/perl/nmanual/foo.html");
 170
 171 =head1 ENVIRONMENT
 172
 173 Uses $Config{pod2html} to setup default options.
 174
 175 =head1 AUTHOR
 176
 177 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 178
 179 =head1 BUGS
 180
 181 Has trouble with C<> etc in = commands.
 182
 183 =head1 SEE ALSO
 184
 185 L<perlpod>
 186
 187 =head1 COPYRIGHT
 188
 189 This program is distributed under the Artistic License.
 190
 191 =cut
 192
 193 my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
 194 my $dircache = "pod2htmd$cache_ext";
 195 my $itemcache = "pod2htmi$cache_ext";
 196
 197 my @begin_stack = ();           # begin/end stack
 198
 199 my @libpods = ();               # files to search for links from C<> directives
 200 my $htmlroot = "/";             # http-server base directory from which all
 201                                 #   relative paths in $podpath stem.
 202 my $htmldir = "";               # The directory to which the html pages
 203                                 # will (eventually) be written.
 204 my $htmlfile = "";              # write to stdout by default
 205 my $htmlfileurl = "" ;          # The url that other files would use to
 206                                 # refer to this file.  This is only used
 207                                 # to make relative urls that point to
 208                                 # other files.
 209 my $podfile = "";               # read from stdin by default
 210 my @podpath = ();               # list of directories containing library pods.
 211 my $podroot = ".";              # filesystem base directory from which all
 212                                 #   relative paths in $podpath stem.
 213 my $css = '';                   # Cascading style sheet
 214 my $recurse = 1;                # recurse on subdirectories in $podpath.
 215 my $quiet = 0;                  # not quiet by default
 216 my $verbose = 0;                # not verbose by default
 217 my $doindex = 1;                # non-zero if we should generate an index
 218 my $listlevel = 0;              # current list depth
 219 my @listitem = ();              # stack of HTML commands to use when a =item is
 220                                 #   encountered.  the top of the stack is the
 221                                 #   current list.
 222 my @listdata = ();              # similar to @listitem, but for the text after
 223                                 #   an =item
 224 my @listend = ();               # similar to @listitem, but the text to use to
 225                                 #   end the list.
 226 my $ignore = 1;                 # whether or not to format text.  we don't
 227                                 #   format text until we hit our first pod
 228                                 #   directive.
 229
 230 my %items_named = ();           # for the multiples of the same item in perlfunc
 231 my @items_seen = ();
 232 my $netscape = 0;               # whether or not to use netscape directives.
 233 my $title;                      # title to give the pod(s)
 234 my $header = 0;                 # produce block header/footer
 235 my $top = 1;                    # true if we are at the top of the doc.  used
 236                                 #   to prevent the first <HR> directive.
 237 my $paragraph;                  # which paragraph we're processing (used
 238                                 #   for error messages)
 239 my %pages = ();                 # associative array used to find the location
 240                                 #   of pages referenced by L<> links.
 241 my %sections = ();              # sections within this page
 242 my %items = ();                 # associative array used to find the location
 243                                 #   of =item directives referenced by C<> links
 244 my $Is83;                       # is dos with short filenames (8.3)
 245
 246 sub init_globals {
 247 $dircache = "pod2htmd$cache_ext";
 248 $itemcache = "pod2htmi$cache_ext";
 249
 250 @begin_stack = ();              # begin/end stack
 251
 252 @libpods = ();          # files to search for links from C<> directives
 253 $htmlroot = "/";                # http-server base directory from which all
 254                                 #   relative paths in $podpath stem.
 255 $htmlfile = "";         # write to stdout by default
 256 $podfile = "";          # read from stdin by default
 257 @podpath = ();          # list of directories containing library pods.
 258 $podroot = ".";         # filesystem base directory from which all
 259                                 #   relative paths in $podpath stem.
 260 $css = '';                   # Cascading style sheet
 261 $recurse = 1;           # recurse on subdirectories in $podpath.
 262 $quiet = 0;             # not quiet by default
 263 $verbose = 0;           # not verbose by default
 264 $doindex = 1;                   # non-zero if we should generate an index
 265 $listlevel = 0;         # current list depth
 266 @listitem = ();         # stack of HTML commands to use when a =item is
 267                                 #   encountered.  the top of the stack is the
 268                                 #   current list.
 269 @listdata = ();         # similar to @listitem, but for the text after
 270                                 #   an =item
 271 @listend = ();          # similar to @listitem, but the text to use to
 272                                 #   end the list.
 273 $ignore = 1;                    # whether or not to format text.  we don't
 274                                 #   format text until we hit our first pod
 275                                 #   directive.
 276
 277 @items_seen = ();
 278 %items_named = ();
 279 $netscape = 0;          # whether or not to use netscape directives.
 280 $header = 0;                    # produce block header/footer
 281 $title = '';                    # title to give the pod(s)
 282 $top = 1;                       # true if we are at the top of the doc.  used
 283                                 #   to prevent the first <HR> directive.
 284 $paragraph = '';                        # which paragraph we're processing (used
 285                                 #   for error messages)
 286 %sections = ();         # sections within this page
 287
 288 # These are not reinitialised here but are kept as a cache.
 289 # See get_cache and related cache management code.
 290 #%pages = ();                   # associative array used to find the location
 291                                 #   of pages referenced by L<> links.
 292 #%items = ();                   # associative array used to find the location
 293                                 #   of =item directives referenced by C<> links
 294 $Is83=$^O eq 'dos';
 295 }
 296
 297 sub pod2html {
 298     local(@ARGV) = @_;
 299     local($/);
 300     local $_;
 301
 302     init_globals();
 303
 304     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 305
 306     # cache of %pages and %items from last time we ran pod2html
 307
 308     #undef $opt_help if defined $opt_help;
 309
 310     # parse the command-line parameters
 311     parse_command_line();
 312
 313     # set some variables to their default values if necessary
 314     local *POD;
 315     unless (@ARGV && $ARGV[0]) {
 316         $podfile  = "-" unless $podfile;        # stdin
 317         open(POD, "<$podfile")
 318                 || die "$0: cannot open $podfile file for input: $!\n";
 319     } else {
 320         $podfile = $ARGV[0];  # XXX: might be more filenames
 321         *POD = *ARGV;
 322     }
 323     $htmlfile = "-" unless $htmlfile;   # stdout
 324     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 325     $htmldir =~ s#/$## ;                # so we don't get a //
 326     if (  $htmlroot eq ''
 327        && defined( $htmldir )
 328        && $htmldir ne ''
 329        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 330        )
 331     {
 332         # Set the 'base' url for this file, so that we can use it
 333         # as the location from which to calculate relative links
 334         # to other files. If this is '', then absolute links will
 335         # be used throughout.
 336         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 337     }
 338
 339     # read the pod a paragraph at a time
 340     warn "Scanning for sections in input file(s)\n" if $verbose;
 341     $/ = "";
 342     my @poddata  = <POD>;
 343     close(POD);
 344
 345     # scan the pod for =head[1-6] directives and build an index
 346     my $index = scan_headings(\%sections, @poddata);
 347
 348     unless($index) {
 349         warn "No headings in $podfile\n" if $verbose;
 350     }
 351
 352     # open the output file
 353     open(HTML, ">$htmlfile")
 354             || die "$0: cannot open $htmlfile file for output: $!\n";
 355
 356     # put a title in the HTML file if one wasn't specified
 357     if ($title eq '') {
 358         TITLE_SEARCH: {
 359             for (my $i = 0; $i < @poddata; $i++) {
 360                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 361                     for my $para ( @poddata[$i, $i+1] ) {
 362                         last TITLE_SEARCH
 363                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 364                     }
 365                 }
 366
 367             }
 368         }
 369     }
 370     if (!$title and $podfile =~ /\.pod$/) {
 371         # probably a split pod so take first =head[12] as title
 372         for (my $i = 0; $i < @poddata; $i++) {
 373             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 374         }
 375         warn "adopted '$title' as title for $podfile\n"
 376             if $verbose and $title;
 377     }
 378     if ($title) {
 379         $title =~ s/\s*\(.*\)//;
 380     } else {
 381         warn "$0: no title for $podfile" unless $quiet;
 382         $podfile =~ /^(.*)(\.[^.\/]+)?$/;
 383         $title = ($podfile eq "-" ? 'No Title' : $1);
 384         warn "using $title" if $verbose;
 385     }
 386     my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
 387     $csslink =~ s,\\,/,g;
 388     $csslink =~ s,(/.):,$1|,;
 389
 390     my $block = $header ? <<END_OF_BLOCK : '';
 391 <TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
 392 <TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
 393 <FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
 394 </TD></TR>
 395 </TABLE>
 396 END_OF_BLOCK
 397
 398     print HTML <<END_OF_HEAD;
 399 <HTML>
 400 <HEAD>
 401 <TITLE>$title</TITLE>$csslink
 402 <LINK REV="made" HREF="mailto:$Config{perladmin}">
 403 </HEAD>
 404
 405 <BODY>
 406 $block
 407 END_OF_HEAD
 408
 409     # load/reload/validate/cache %pages and %items
 410     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 411
 412     # scan the pod for =item directives
 413     scan_items("", \%items, @poddata);
 414
 415     # put an index at the top of the file.  note, if $doindex is 0 we
 416     # still generate an index, but surround it with an html comment.
 417     # that way some other program can extract it if desired.
 418     $index =~ s/--+/-/g;
 419     print HTML "<!-- INDEX BEGIN -->\n";
 420     print HTML "<!--\n" unless $doindex;
 421     print HTML $index;
 422     print HTML "-->\n" unless $doindex;
 423     print HTML "<!-- INDEX END -->\n\n";
 424     print HTML "<HR>\n" if $doindex and $index;
 425
 426     # now convert this file
 427     warn "Converting input file\n" if $verbose;
 428     foreach my $i (0..$#poddata) {
 429         $_ = $poddata[$i];
 430         $paragraph = $i+1;
 431         if (/^(=.*)/s) {        # is it a pod directive?
 432             $ignore = 0;
 433             $_ = $1;
 434             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 435                 process_begin($1, $2);
 436             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 437                 process_end($1, $2);
 438             } elsif (/^=cut/) {                 # =cut
 439                 process_cut();
 440             } elsif (/^=pod/) {                 # =pod
 441                 process_pod();
 442             } else {
 443                 next if @begin_stack && $begin_stack[-1] ne 'html';
 444
 445                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 446                     process_head($1, $2);
 447                 } elsif (/^=item\s*(.*\S)/sm) { # =item text
 448                     process_item($1);
 449                 } elsif (/^=over\s*(.*)/) {             # =over N
 450                     process_over();
 451                 } elsif (/^=back/) {            # =back
 452                     process_back();
 453                 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
 454                     process_for($1,$2);
 455                 } else {
 456                     /^=(\S*)\s*/;
 457                     warn "$0: $podfile: unknown pod directive '$1' in "
 458                        . "paragraph $paragraph.  ignoring.\n";
 459                 }
 460             }
 461             $top = 0;
 462         }
 463         else {
 464             next if $ignore;
 465             next if @begin_stack && $begin_stack[-1] ne 'html';
 466             my $text = $_;
 467             process_text(\$text, 1);
 468             print HTML "<P>\n$text</P>\n";
 469         }
 470     }
 471
 472     # finish off any pending directives
 473     finish_list();
 474     print HTML <<END_OF_TAIL;
 475 $block
 476 </BODY>
 477
 478 </HTML>
 479 END_OF_TAIL
 480
 481     # close the html file
 482     close(HTML);
 483
 484     warn "Finished\n" if $verbose;
 485 }
 486
 487 ##############################################################################
 488
 489 my $usage;                      # see below
 490 sub usage {
 491     my $podfile = shift;
 492     warn "$0: $podfile: @_\n" if @_;
 493     die $usage;
 494 }
 495
 496 $usage =<<END_OF_USAGE;
 497 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 498            --podpath=<name>:...:<name> --podroot=<name>
 499            --libpods=<name>:...:<name> --recurse --verbose --index
 500            --netscape --norecurse --noindex
 501
 502   --flush      - flushes the item and directory caches.
 503   --help       - prints this message.
 504   --htmlroot   - http-server base directory from which all relative paths
 505                  in podpath stem (default is /).
 506   --index      - generate an index at the top of the resulting html
 507                  (default).
 508   --infile     - filename for the pod to convert (input taken from stdin
 509                  by default).
 510   --libpods    - colon-separated list of pages to search for =item pod
 511                  directives in as targets of C<> and implicit links (empty
 512                  by default).  note, these are not filenames, but rather
 513                  page names like those that appear in L<> links.
 514   --netscape   - will use netscape html directives when applicable.
 515   --nonetscape - will not use netscape directives (default).
 516   --outfile    - filename for the resulting html file (output sent to
 517                  stdout by default).
 518   --podpath    - colon-separated list of directories containing library
 519                  pods.  empty by default.
 520   --podroot    - filesystem base directory from which all relative paths
 521                  in podpath stem (default is .).
 522   --noindex    - don't generate an index at the top of the resulting html.
 523   --norecurse  - don't recurse on those subdirectories listed in podpath.
 524   --recurse    - recurse on those subdirectories listed in podpath
 525                  (default behavior).
 526   --title      - title that will appear in resulting html file.
 527   --header     - produce block header/footer
 528   --css        - stylesheet URL
 529   --verbose    - self-explanatory
 530   --quiet      - supress some benign warning messages
 531
 532 END_OF_USAGE
 533
 534 sub parse_command_line {
 535     my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
 536     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 537     my $result = GetOptions(
 538                             'flush'      => \$opt_flush,
 539                             'help'       => \$opt_help,
 540                             'htmldir=s'  => \$opt_htmldir,
 541                             'htmlroot=s' => \$opt_htmlroot,
 542                             'index!'     => \$opt_index,
 543                             'infile=s'   => \$opt_infile,
 544                             'libpods=s'  => \$opt_libpods,
 545                             'netscape!'  => \$opt_netscape,
 546                             'outfile=s'  => \$opt_outfile,
 547                             'podpath=s'  => \$opt_podpath,
 548                             'podroot=s'  => \$opt_podroot,
 549                             'norecurse'  => \$opt_norecurse,
 550                             'recurse!'   => \$opt_recurse,
 551                             'title=s'    => \$opt_title,
 552                             'header'     => \$opt_header,
 553                             'css=s'      => \$opt_css,
 554                             'verbose'    => \$opt_verbose,
 555                             'quiet'      => \$opt_quiet,
 556                            );
 557     usage("-", "invalid parameters") if not $result;
 558
 559     usage("-") if defined $opt_help;    # see if the user asked for help
 560     $opt_help = "";                     # just to make -w shut-up.
 561
 562     $podfile  = $opt_infile if defined $opt_infile;
 563     $htmlfile = $opt_outfile if defined $opt_outfile;
 564     $htmldir  = $opt_htmldir if defined $opt_outfile;
 565
 566     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 567     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 568
 569     warn "Flushing item and directory caches\n"
 570         if $opt_verbose && defined $opt_flush;
 571     unlink($dircache, $itemcache) if defined $opt_flush;
 572
 573     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 574     $podroot  = $opt_podroot if defined $opt_podroot;
 575
 576     $doindex  = $opt_index if defined $opt_index;
 577     $recurse  = $opt_recurse if defined $opt_recurse;
 578     $title    = $opt_title if defined $opt_title;
 579     $header   = defined $opt_header ? 1 : 0;
 580     $css      = $opt_css if defined $opt_css;
 581     $verbose  = defined $opt_verbose ? 1 : 0;
 582     $quiet    = defined $opt_quiet ? 1 : 0;
 583     $netscape = $opt_netscape if defined $opt_netscape;
 584 }
 585
 586
 587 my $saved_cache_key;
 588
 589 sub get_cache {
 590     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 591     my @cache_key_args = @_;
 592
 593     # A first-level cache:
 594     # Don't bother reading the cache files if they still apply
 595     # and haven't changed since we last read them.
 596
 597     my $this_cache_key = cache_key(@cache_key_args);
 598
 599     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 600
 601     # load the cache of %pages and %items if possible.  $tests will be
 602     # non-zero if successful.
 603     my $tests = 0;
 604     if (-f $dircache && -f $itemcache) {
 605         warn "scanning for item cache\n" if $verbose;
 606         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 607     }
 608
 609     # if we didn't succeed in loading the cache then we must (re)build
 610     #  %pages and %items.
 611     if (!$tests) {
 612         warn "scanning directories in pod-path\n" if $verbose;
 613         scan_podpath($podroot, $recurse, 0);
 614     }
 615     $saved_cache_key = cache_key(@cache_key_args);
 616 }
 617
 618 sub cache_key {
 619     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 620     return join('!', $dircache, $itemcache, $recurse,
 621         @$podpath, $podroot, stat($dircache), stat($itemcache));
 622 }
 623
 624 #
 625 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 626 #  are valid caches of %pages and %items.  if they are valid then it loads
 627 #  them and returns a non-zero value.
 628 #
 629
 630 sub load_cache {
 631     my($dircache, $itemcache, $podpath, $podroot) = @_;
 632     my($tests);
 633     local $_;
 634
 635     $tests = 0;
 636
 637     open(CACHE, "<$itemcache") ||
 638         die "$0: error opening $itemcache for reading: $!\n";
 639     $/ = "\n";
 640
 641     # is it the same podpath?
 642     $_ = <CACHE>;
 643     chomp($_);
 644     $tests++ if (join(":", @$podpath) eq $_);
 645
 646     # is it the same podroot?
 647     $_ = <CACHE>;
 648     chomp($_);
 649     $tests++ if ($podroot eq $_);
 650
 651     # load the cache if its good
 652     if ($tests != 2) {
 653         close(CACHE);
 654         return 0;
 655     }
 656
 657     warn "loading item cache\n" if $verbose;
 658     while (<CACHE>) {
 659         /(.*?) (.*)$/;
 660         $items{$1} = $2;
 661     }
 662     close(CACHE);
 663
 664     warn "scanning for directory cache\n" if $verbose;
 665     open(CACHE, "<$dircache") ||
 666         die "$0: error opening $dircache for reading: $!\n";
 667     $/ = "\n";
 668     $tests = 0;
 669
 670     # is it the same podpath?
 671     $_ = <CACHE>;
 672     chomp($_);
 673     $tests++ if (join(":", @$podpath) eq $_);
 674
 675     # is it the same podroot?
 676     $_ = <CACHE>;
 677     chomp($_);
 678     $tests++ if ($podroot eq $_);
 679
 680     # load the cache if its good
 681     if ($tests != 2) {
 682         close(CACHE);
 683         return 0;
 684     }
 685
 686     warn "loading directory cache\n" if $verbose;
 687     while (<CACHE>) {
 688         /(.*?) (.*)$/;
 689         $pages{$1} = $2;
 690     }
 691
 692     close(CACHE);
 693
 694     return 1;
 695 }
 696
 697 #
 698 # scan_podpath - scans the directories specified in @podpath for directories,
 699 #  .pod files, and .pm files.  it also scans the pod files specified in
 700 #  @libpods for =item directives.
 701 #
 702 sub scan_podpath {
 703     my($podroot, $recurse, $append) = @_;
 704     my($pwd, $dir);
 705     my($libpod, $dirname, $pod, @files, @poddata);
 706
 707     unless($append) {
 708         %items = ();
 709         %pages = ();
 710     }
 711
 712     # scan each directory listed in @podpath
 713     $pwd = getcwd();
 714     chdir($podroot)
 715         || die "$0: error changing to directory $podroot: $!\n";
 716     foreach $dir (@podpath) {
 717         scan_dir($dir, $recurse);
 718     }
 719
 720     # scan the pods listed in @libpods for =item directives
 721     foreach $libpod (@libpods) {
 722         # if the page isn't defined then we won't know where to find it
 723         # on the system.
 724         next unless defined $pages{$libpod} && $pages{$libpod};
 725
 726         # if there is a directory then use the .pod and .pm files within it.
 727         # NOTE: Only finds the first so-named directory in the tree.
 728 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 729         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 730             #  find all the .pod and .pm files within the directory
 731             $dirname = $1;
 732             opendir(DIR, $dirname) ||
 733                 die "$0: error opening directory $dirname: $!\n";
 734             @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
 735             closedir(DIR);
 736
 737             # scan each .pod and .pm file for =item directives
 738             foreach $pod (@files) {
 739                 open(POD, "<$dirname/$pod") ||
 740                     die "$0: error opening $dirname/$pod for input: $!\n";
 741                 @poddata = <POD>;
 742                 close(POD);
 743
 744                 scan_items("$dirname/$pod", @poddata);
 745             }
 746
 747             # use the names of files as =item directives too.
 748             foreach $pod (@files) {
 749                 $pod =~ /^(.*)(\.pod|\.pm)$/;
 750                 $items{$1} = "$dirname/$1.html" if $1;
 751             }
 752         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 753                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 754             # scan the .pod or .pm file for =item directives
 755             $pod = $1;
 756             open(POD, "<$pod") ||
 757                 die "$0: error opening $pod for input: $!\n";
 758             @poddata = <POD>;
 759             close(POD);
 760
 761             scan_items("$pod", @poddata);
 762         } else {
 763             warn "$0: shouldn't be here (line ".__LINE__."\n";
 764         }
 765     }
 766     @poddata = ();      # clean-up a bit
 767
 768     chdir($pwd)
 769         || die "$0: error changing to directory $pwd: $!\n";
 770
 771     # cache the item list for later use
 772     warn "caching items for later use\n" if $verbose;
 773     open(CACHE, ">$itemcache") ||
 774         die "$0: error open $itemcache for writing: $!\n";
 775
 776     print CACHE join(":", @podpath) . "\n$podroot\n";
 777     foreach my $key (keys %items) {
 778         print CACHE "$key $items{$key}\n";
 779     }
 780
 781     close(CACHE);
 782
 783     # cache the directory list for later use
 784     warn "caching directories for later use\n" if $verbose;
 785     open(CACHE, ">$dircache") ||
 786         die "$0: error open $dircache for writing: $!\n";
 787
 788     print CACHE join(":", @podpath) . "\n$podroot\n";
 789     foreach my $key (keys %pages) {
 790         print CACHE "$key $pages{$key}\n";
 791     }
 792
 793     close(CACHE);
 794 }
 795
 796 #
 797 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 798 #  files, and .pm files.  notes those that it finds.  this information will
 799 #  be used later in order to figure out where the pages specified in L<>
 800 #  links are on the filesystem.
 801 #
 802 sub scan_dir {
 803     my($dir, $recurse) = @_;
 804     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 805     local $_;
 806
 807     @subdirs = ();
 808     @pods = ();
 809
 810     opendir(DIR, $dir) ||
 811         die "$0: error opening directory $dir: $!\n";
 812     while (defined($_ = readdir(DIR))) {
 813         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 814             $pages{$_}  = "" unless defined $pages{$_};
 815             $pages{$_} .= "$dir/$_:";
 816             push(@subdirs, $_);
 817         } elsif (/\.pod$/) {                                # .pod
 818             s/\.pod$//;
 819             $pages{$_}  = "" unless defined $pages{$_};
 820             $pages{$_} .= "$dir/$_.pod:";
 821             push(@pods, "$dir/$_.pod");
 822         } elsif (/\.pm$/) {                                 # .pm
 823             s/\.pm$//;
 824             $pages{$_}  = "" unless defined $pages{$_};
 825             $pages{$_} .= "$dir/$_.pm:";
 826             push(@pods, "$dir/$_.pm");
 827         }
 828     }
 829     closedir(DIR);
 830
 831     # recurse on the subdirectories if necessary
 832     if ($recurse) {
 833         foreach my $subdir (@subdirs) {
 834             scan_dir("$dir/$subdir", $recurse);
 835         }
 836     }
 837 }
 838
 839 #
 840 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 841 #  build an index.
 842 #
 843 sub scan_headings {
 844     my($sections, @data) = @_;
 845     my($tag, $which_head, $title, $listdepth, $index);
 846
 847     # here we need      local $ignore = 0;
 848     #  unfortunately, we can't have it, because $ignore is lexical
 849     $ignore = 0;
 850
 851     $listdepth = 0;
 852     $index = "";
 853
 854     # scan for =head directives, note their name, and build an index
 855     #  pointing to each of them.
 856     foreach my $line (@data) {
 857         if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 858             ($tag,$which_head, $title) = ($1,$2,$3);
 859             chomp($title);
 860             $$sections{htmlify(0,$title)} = 1;
 861
 862             while ($which_head != $listdepth) {
 863                 if ($which_head > $listdepth) {
 864                     $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
 865                     $listdepth++;
 866                 } elsif ($which_head < $listdepth) {
 867                     $listdepth--;
 868                     $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 869                 }
 870             }
 871
 872             $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
 873                       "<A HREF=\"#" . htmlify(0,$title) . "\">" .
 874                       html_escape(process_text(\$title, 0)) . "</A></LI>";
 875         }
 876     }
 877
 878     # finish off the lists
 879     while ($listdepth--) {
 880         $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
 881     }
 882
 883     # get rid of bogus lists
 884     $index =~ s,\t*<UL>\s*</UL>\n,,g;
 885
 886     $ignore = 1;        # restore old value;
 887
 888     return $index;
 889 }
 890
 891 #
 892 # scan_items - scans the pod specified by $pod for =item directives.  we
 893 #  will use this information later on in resolving C<> links.
 894 #
 895 sub scan_items {
 896     my($pod, @poddata) = @_;
 897     my($i, $item);
 898     local $_;
 899
 900     $pod =~ s/\.pod$//;
 901     $pod .= ".html" if $pod;
 902
 903     foreach $i (0..$#poddata) {
 904         $_ = $poddata[$i];
 905
 906         # remove any formatting instructions
 907         s,[A-Z]<([^<>]*)>,$1,g;
 908
 909         # figure out what kind of item it is and get the first word of
 910         #  it's name.
 911         if (/^=item\s+(\w*)\s*.*$/s) {
 912             if ($1 eq "*") {            # bullet list
 913                 /\A=item\s+\*\s*(.*?)\s*\Z/s;
 914                 $item = $1;
 915             } elsif ($1 =~ /^\d+/) {    # numbered list
 916                 /\A=item\s+\d+\.?(.*?)\s*\Z/s;
 917                 $item = $1;
 918             } else {
 919 #               /\A=item\s+(.*?)\s*\Z/s;
 920                 /\A=item\s+(\w*)/s;
 921                 $item = $1;
 922             }
 923
 924             $items{$item} = "$pod" if $item;
 925         }
 926     }
 927 }
 928
 929 #
 930 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
 931 #
 932 sub process_head {
 933     my($tag, $heading) = @_;
 934     my $firstword;
 935
 936     # figure out the level of the =head
 937     $tag =~ /head([1-6])/;
 938     my $level = $1;
 939
 940     # can't have a heading full of spaces and speechmarks and so on
 941     $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
 942
 943     print HTML "<P>\n" unless $listlevel;
 944     print HTML "<HR>\n" unless $listlevel || $top;
 945     print HTML "<H$level>"; # unless $listlevel;
 946     #print HTML "<H$level>" unless $listlevel;
 947     my $convert = $heading; process_text(\$convert, 0);
 948     $convert = html_escape($convert);
 949     print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
 950     print HTML "</H$level>"; # unless $listlevel;
 951     print HTML "\n";
 952 }
 953
 954 #
 955 # process_item - convert a pod item tag and convert it to HTML format.
 956 #
 957 sub process_item {
 958     my $text = $_[0];
 959     my($i, $quote, $name);
 960
 961     my $need_preamble = 0;
 962     my $this_entry;
 963
 964
 965     # lots of documents start a list without doing an =over.  this is
 966     # bad!  but, the proper thing to do seems to be to just assume
 967     # they did do an =over.  so warn them once and then continue.
 968     warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n"
 969         unless $listlevel;
 970     process_over() unless $listlevel;
 971
 972     return unless $listlevel;
 973
 974     # remove formatting instructions from the text
 975     1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
 976     pre_escape(\$text);
 977
 978     $need_preamble = $items_seen[$listlevel]++ == 0;
 979
 980     # check if this is the first =item after an =over
 981     $i = $listlevel - 1;
 982     my $need_new = $listlevel >= @listitem;
 983
 984     if ($text =~ /\A\*/) {              # bullet
 985
 986         if ($need_preamble) {
 987             push(@listend,  "</UL>");
 988             print HTML "<UL>\n";
 989         }
 990
 991         print HTML '<LI>';
 992         if ($text =~ /\A\*\s*(.+)\Z/s) {
 993             print HTML '<STRONG>';
 994             if ($items_named{$1}++) {
 995                 print HTML html_escape($1);
 996             } else {
 997                 my $name = 'item_' . htmlify(1,$1);
 998                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
 999             }
1000             print HTML '</STRONG>';
1001         }
1002
1003     } elsif ($text =~ /\A[\d#]+/) {     # numbered list
1004
1005         if ($need_preamble) {
1006             push(@listend,  "</OL>");
1007             print HTML "<OL>\n";
1008         }
1009
1010         print HTML '<LI>';
1011         if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
1012             print HTML '<STRONG>';
1013             if ($items_named{$1}++) {
1014                 print HTML html_escape($1);
1015             } else {
1016                 my $name = 'item_' . htmlify(0,$1);
1017                 print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
1018             }
1019             print HTML '</STRONG>';
1020         }
1021
1022     } else {                    # all others
1023
1024         if ($need_preamble) {
1025             push(@listend,  '</DL>');
1026             print HTML "<DL>\n";
1027         }
1028
1029         print HTML '<DT>';
1030         if ($text =~ /(\S+)/) {
1031             print HTML '<STRONG>';
1032             if ($items_named{$1}++) {
1033                 print HTML html_escape($text);
1034             } else {
1035                 my $name = 'item_' . htmlify(1,$text);
1036                 print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
1037             }
1038             print HTML '</STRONG>';
1039         }
1040        print HTML '<DD>';
1041     }
1042
1043     print HTML "\n";
1044 }
1045
1046 #
1047 # process_over - process a pod over tag and start a corresponding HTML
1048 # list.
1049 #
1050 sub process_over {
1051     # start a new list
1052     $listlevel++;
1053 }
1054
1055 #
1056 # process_back - process a pod back tag and convert it to HTML format.
1057 #
1058 sub process_back {
1059     warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n"
1060         unless $listlevel;
1061     return unless $listlevel;
1062
1063     # close off the list.  note, I check to see if $listend[$listlevel] is
1064     # defined because an =item directive may have never appeared and thus
1065     # $listend[$listlevel] may have never been initialized.
1066     $listlevel--;
1067     print HTML $listend[$listlevel] if defined $listend[$listlevel];
1068     print HTML "\n";
1069
1070     # don't need the corresponding perl code anymore
1071     pop(@listitem);
1072     pop(@listdata);
1073     pop(@listend);
1074
1075     pop(@items_seen);
1076 }
1077
1078 #
1079 # process_cut - process a pod cut tag, thus stop ignoring pod directives.
1080 #
1081 sub process_cut {
1082     $ignore = 1;
1083 }
1084
1085 #
1086 # process_pod - process a pod pod tag, thus ignore pod directives until we see a
1087 # corresponding cut.
1088 #
1089 sub process_pod {
1090     # no need to set $ignore to 0 cause the main loop did it
1091 }
1092
1093 #
1094 # process_for - process a =for pod tag.  if it's for html, split
1095 # it out verbatim, if illustration, center it, otherwise ignore it.
1096 #
1097 sub process_for {
1098     my($whom, $text) = @_;
1099     if ( $whom =~ /^(pod2)?html$/i) {
1100         print HTML $text;
1101     } elsif ($whom =~ /^illustration$/i) {
1102         1 while chomp $text;
1103         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1104           $text .= $ext, last if -r "$text$ext";
1105         }
1106         print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1107     }
1108 }
1109
1110 #
1111 # process_begin - process a =begin pod tag.  this pushes
1112 # whom we're beginning on the begin stack.  if there's a
1113 # begin stack, we only print if it us.
1114 #
1115 sub process_begin {
1116     my($whom, $text) = @_;
1117     $whom = lc($whom);
1118     push (@begin_stack, $whom);
1119     if ( $whom =~ /^(pod2)?html$/) {
1120         print HTML $text if $text;
1121     }
1122 }
1123
1124 #
1125 # process_end - process a =end pod tag.  pop the
1126 # begin stack.  die if we're mismatched.
1127 #
1128 sub process_end {
1129     my($whom, $text) = @_;
1130     $whom = lc($whom);
1131     if ($begin_stack[-1] ne $whom ) {
1132         die "Unmatched begin/end at chunk $paragraph\n"
1133     }
1134     pop @begin_stack;
1135 }
1136
1137 #
1138 # process_text - handles plaintext that appears in the input pod file.
1139 # there may be pod commands embedded within the text so those must be
1140 # converted to html commands.
1141 #
1142 sub process_text {
1143     my($text, $escapeQuotes) = @_;
1144     my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
1145     my($podcommand, $params, $tag, $quote);
1146
1147     return if $ignore;
1148
1149     $quote  = 0;                # status of double-quote conversion
1150     $result = "";
1151     $rest = $$text;
1152
1153     if ($rest =~ /^\s+/) {      # preformatted text, no pod directives
1154         $rest =~ s/\n+\Z//;
1155         $rest =~ s#.*#
1156             my $line = $&;
1157             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1158             $line;
1159         #eg;
1160
1161         $rest   =~ s/&/&amp;/g;
1162         $rest   =~ s/</&lt;/g;
1163         $rest   =~ s/>/&gt;/g;
1164         $rest   =~ s/"/&quot;/g;
1165
1166         # try and create links for all occurrences of perl.* within
1167         # the preformatted text.
1168         $rest =~ s{
1169                     (\s*)(perl\w+)
1170                   }{
1171                     if (defined $pages{$2}) {   # is a link
1172                         qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1173                     } elsif (defined $pages{dosify($2)}) {      # is a link
1174                         qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1175                     } else {
1176                         "$1$2";
1177                     }
1178                   }xeg;
1179 #       $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
1180         $rest =~ s{
1181                     (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1182                   }{
1183                     my $url ;
1184                     if ( $htmlfileurl ne '' ) {
1185                         # Here, we take advantage of the knowledge
1186                         # that $htmlfileurl ne '' implies $htmlroot eq ''.
1187                         # Since $htmlroot eq '', we need to prepend $htmldir
1188                         # on the fron of the link to get the absolute path
1189                         # of the link's target. We check for a leading '/'
1190                         # to avoid corrupting links that are #, file:, etc.
1191                         my $old_url = $3 ;
1192                         $old_url = "$htmldir$old_url"
1193                             if ( $old_url =~ m{^\/} ) ;
1194                         $url = relativize_url( "$old_url.html", $htmlfileurl );
1195 # print( "  a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
1196                     }
1197                     else {
1198                         $url = "$3.html" ;
1199                     }
1200                     "$1$url" ;
1201                   }xeg;
1202
1203   # Look for embedded URLs and make them in to links.  We don't
1204   # relativize them since they are best left as the author intended.
1205   my $urls = '(' . join ('|', qw{
1206                 http
1207                 telnet
1208                 mailto
1209                 news
1210                 gopher
1211                 file
1212                 wais
1213                 ftp
1214             } )
1215         . ')';
1216
1217   my $ltrs = '\w';
1218   my $gunk = '/#~:.?+=&%@!\-';
1219   my $punc = '.:?\-';
1220   my $any  = "${ltrs}${gunk}${punc}";
1221
1222   $rest =~ s{
1223         \b                          # start at word boundary
1224         (                           # begin $1  {
1225           $urls     :               # need resource and a colon
1226           (?!:)                     # Ignore File::, among others.
1227           [$any] +?                 # followed by on or more
1228                                     #  of any valid character, but
1229                                     #  be conservative and take only
1230                                     #  what you need to....
1231         )                           # end   $1  }
1232         (?=                         # look-ahead non-consumptive assertion
1233                 [$punc]*            # either 0 or more puntuation
1234                 [^$any]             #   followed by a non-url char
1235             |                       # or else
1236                 $                   #   then end of the string
1237         )
1238       }{<A HREF="$1">$1</A>}igox;
1239
1240         $result =   "<PRE>"     # text should be as it is (verbatim)
1241                   . "$rest\n"
1242                   . "</PRE>\n";
1243     } else {                    # formatted text
1244         # parse through the string, stopping each time we find a
1245         # pod-escape.  once the string has been throughly processed
1246         # we can output it.
1247         while (length $rest) {
1248             # check to see if there are any possible pod directives in
1249             # the remaining part of the text.
1250             if ($rest =~ m/[BCEIFLSZ]</) {
1251                 warn "\$rest\t= $rest\n" unless
1252                     $rest =~ /\A
1253                            ([^<]*?)
1254                            ([BCEIFLSZ]?)
1255                            <
1256                            (.*)\Z/xs;
1257
1258                 $s1 = $1;       # pure text
1259                 $s2 = $2;       # the type of pod-escape that follows
1260                 $s3 = '<';      # '<'
1261                 $s4 = $3;       # the rest of the string
1262             } else {
1263                 $s1 = $rest;
1264                 $s2 = "";
1265                 $s3 = "";
1266                 $s4 = "";
1267             }
1268
1269             if ($s3 eq '<' && $s2) {    # a pod-escape
1270                 $result    .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
1271                 $podcommand = "$s2<";
1272                 $rest       = $s4;
1273
1274                 # find the matching '>'
1275                 $match = 1;
1276                 $bf = 0;
1277                 while ($match && !$bf) {
1278                     $bf = 1;
1279                     if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
1280                         $bf = 0;
1281                         $match++;
1282                         $podcommand .= $1;
1283                         $rest        = $2;
1284                     } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
1285                         $bf = 0;
1286                         $match--;
1287                         $podcommand .= $1;
1288                         $rest        = $2;
1289                     }
1290                 }
1291
1292                 if ($match != 0) {
1293                     warn <<WARN;
1294 $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
1295 WARN
1296                     $result .= substr $podcommand, 0, 2;
1297                     $rest = substr($podcommand, 2) . $rest;
1298                     next;
1299                 }
1300
1301                 # pull out the parameters to the pod-escape
1302                 $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
1303                 $tag    = $1;
1304                 $params = $2;
1305
1306                 # process the text within the pod-escape so that any escapes
1307                 # which must occur do.
1308                 process_text(\$params, 0) unless $tag eq 'L';
1309
1310                 $s1 = $params;
1311                 if (!$tag || $tag eq " ") {     #  <> : no tag
1312                     $s1 = "&lt;$params&gt;";
1313                 } elsif ($tag eq "L") {         # L<> : link
1314                     $s1 = process_L($params);
1315                 } elsif ($tag eq "I" ||         # I<> : italicize text
1316                          $tag eq "B" ||         # B<> : bold text
1317                          $tag eq "F") {         # F<> : file specification
1318                     $s1 = process_BFI($tag, $params);
1319                 } elsif ($tag eq "C") {         # C<> : literal code
1320                     $s1 = process_C($params, 1);
1321                 } elsif ($tag eq "E") {         # E<> : escape
1322                     $s1 = process_E($params);
1323                 } elsif ($tag eq "Z") {         # Z<> : zero-width character
1324                     $s1 = process_Z($params);
1325                 } elsif ($tag eq "S") {         # S<> : non-breaking space
1326                     $s1 = process_S($params);
1327                 } elsif ($tag eq "X") {         # S<> : non-breaking space
1328                     $s1 = process_X($params);
1329                 } else {
1330                     warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
1331                 }
1332
1333                 $result .= "$s1";
1334             } else {
1335                 # for pure text we must deal with implicit links and
1336                 # double-quotes among other things.
1337                 $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
1338                 $rest    = $s4;
1339             }
1340         }
1341     }
1342     $$text = $result;
1343 }
1344
1345 sub html_escape {
1346     my $rest = $_[0];
1347     $rest   =~ s/&(?!\w+;|#)/&amp;/g;   # XXX not bulletproof
1348     $rest   =~ s/</&lt;/g;
1349     $rest   =~ s/>/&gt;/g;
1350     $rest   =~ s/"/&quot;/g;
1351     return $rest;
1352 }
1353
1354 #
1355 # process_puretext - process pure text (without pod-escapes) converting
1356 #  double-quotes and handling implicit C<> links.
1357 #
1358 sub process_puretext {
1359     my($text, $quote) = @_;
1360     my(@words, $result, $rest, $lead, $trail);
1361
1362     # convert double-quotes to single-quotes
1363     $text =~ s/\A([^"]*)"/$1''/s if $$quote;
1364     while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
1365
1366     $$quote = ($text =~ m/"/ ? 1 : 0);
1367     $text =~ s/\A([^"]*)"/$1``/s if $$quote;
1368
1369     # keep track of leading and trailing white-space
1370     $lead  = ($text =~ /\A(\s*)/s ? $1 : "");
1371     $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
1372
1373     # collapse all white space into a single space
1374     $text =~ s/\s+/ /g;
1375     @words = split(" ", $text);
1376
1377     # process each word individually
1378     foreach my $word (@words) {
1379         # see if we can infer a link
1380         if ($word =~ /^\w+\(/) {
1381             # has parenthesis so should have been a C<> ref
1382             $word = process_C($word);
1383 #           $word =~ /^[^()]*]\(/;
1384 #           if (defined $items{$1} && $items{$1}) {
1385 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
1386 #                       . htmlify(0,$word)
1387 #                       . "\">$word</A></CODE>";
1388 #           } elsif (defined $items{$word} && $items{$word}) {
1389 #               $word =   "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
1390 #                       . htmlify(0,$word)
1391 #                       . "\">$word</A></CODE>";
1392 #           } else {
1393 #               $word =   "\n<CODE><A HREF=\"#item_"
1394 #                       . htmlify(0,$word)
1395 #                       . "\">$word</A></CODE>";
1396 #           }
1397         } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
1398             # perl variables, should be a C<> ref
1399             $word = process_C($word, 1);
1400         } elsif ($word =~ m,^\w+://\w,) {
1401             # looks like a URL
1402             # Don't relativize it: leave it as the author intended
1403             $word = qq(<A HREF="$word">$word</A>);
1404         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1405             # looks like an e-mail address
1406             my ($w1, $w2, $w3) = ("", $word, "");
1407             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1408             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1409             $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
1410         } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) {  # all uppercase?
1411             $word = html_escape($word) if $word =~ /["&<>]/;
1412             $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1413         } else {
1414             $word = html_escape($word) if $word =~ /["&<>]/;
1415         }
1416     }
1417
1418     # build a new string based upon our conversion
1419     $result = "";
1420     $rest   = join(" ", @words);
1421     while (length($rest) > 75) {
1422         if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
1423              $rest =~ m/^(\S*)\s(.*?)$/o) {
1424
1425             $result .= "$1\n";
1426             $rest    = $2;
1427         } else {
1428             $result .= "$rest\n";
1429             $rest    = "";
1430         }
1431     }
1432     $result .= $rest if $rest;
1433
1434     # restore the leading and trailing white-space
1435     $result = "$lead$result$trail";
1436
1437     return $result;
1438 }
1439
1440 #
1441 # pre_escape - convert & in text to $amp;
1442 #
1443 sub pre_escape {
1444     my($str) = @_;
1445     $$str =~ s/&(?!\w+;|#)/&amp;/g;     # XXX not bulletproof
1446 }
1447
1448 #
1449 # dosify - convert filenames to 8.3
1450 #
1451 sub dosify {
1452     my($str) = @_;
1453     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1454     if ($Is83) {
1455         $str = lc $str;
1456         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1457         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1458     }
1459     return $str;
1460 }
1461
1462 #
1463 # process_L - convert a pod L<> directive to a corresponding HTML link.
1464 #  most of the links made are inferred rather than known about directly
1465 #  (i.e it's not known whether the =head\d section exists in the target file,
1466 #   or whether a .pod file exists in the case of split files).  however, the
1467 #  guessing usually works.
1468 #
1469 # Unlike the other directives, this should be called with an unprocessed
1470 # string, else tags in the link won't be matched.
1471 #
1472 sub process_L {
1473     my($str) = @_;
1474     my($s1, $s2, $linktext, $page, $page83, $section, $link);   # work strings
1475
1476     $str =~ s/\n/ /g;                   # undo word-wrapped tags
1477     $s1 = $str;
1478     for ($s1) {
1479         # LREF: a la HREF L<show this text|man/section>
1480         $linktext = $1 if s:^([^|]+)\|::;
1481
1482         # make sure sections start with a /
1483         s,^",/",g;
1484         s,^,/,g if (!m,/, && / /);
1485
1486         # check if there's a section specified
1487         if (m,^(.*?)/"?(.*?)"?$,) {     # yes
1488             ($page, $section) = ($1, $2);
1489         } else {                        # no
1490             ($page, $section) = ($str, "");
1491         }
1492
1493         # check if we know that this is a section in this page
1494         if (!defined $pages{$page} && defined $sections{$page}) {
1495             $section = $page;
1496             $page = "";
1497         }
1498
1499         # remove trailing punctuation, like ()
1500         $section =~ s/\W*$// ;
1501     }
1502
1503     $page83=dosify($page);
1504     $page=$page83 if (defined $pages{$page83});
1505     if ($page eq "") {
1506         $link = "#" . htmlify(0,$section);
1507         $linktext = $section unless defined($linktext);
1508     } elsif ( $page =~ /::/ ) {
1509         $linktext  = ($section ? "$section" : "$page")
1510             unless defined($linktext);
1511         $page =~ s,::,/,g;
1512         # Search page cache for an entry keyed under the html page name,
1513         # then look to see what directory that page might be in.  NOTE:
1514         # this will only find one page. A better solution might be to produce
1515         # an intermediate page that is an index to all such pages.
1516         my $page_name = $page ;
1517         $page_name =~ s,^.*/,, ;
1518         if ( defined( $pages{ $page_name } ) &&
1519              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1520            ) {
1521             $page = $1 ;
1522         }
1523         else {
1524             # NOTE: This branch assumes that all A::B pages are located in
1525             # $htmlroot/A/B.html . This is often incorrect, since they are
1526             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1527             # analyze the contents of %pages and figure out where any
1528             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1529             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1530             # lib/A/B.pm. This is also limited, but it's an improvement.
1531             # Maybe a hints file so that the links point to the correct places
1532             # non-theless?
1533             # Also, maybe put a warn "$0: cannot resolve..." here.
1534         }
1535         $link = "$htmlroot/$page.html";
1536         $link .= "#" . htmlify(0,$section) if ($section);
1537     } elsif (!defined $pages{$page}) {
1538         warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n" unless $quiet;
1539         $link = "";
1540         $linktext = $page unless defined($linktext);
1541     } else {
1542         $linktext  = ($section ? "$section" : "the $page manpage") unless defined($linktext);
1543         $section = htmlify(0,$section) if $section ne "";
1544
1545         # if there is a directory by the name of the page, then assume that an
1546         # appropriate section will exist in the subdirectory
1547 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1548         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1549             $link = "$htmlroot/$1/$section.html";
1550
1551         # since there is no directory by the name of the page, the section will
1552         # have to exist within a .html of the same name.  thus, make sure there
1553         # is a .pod or .pm that might become that .html
1554         } else {
1555             $section = "#$section";
1556             # check if there is a .pod with the page name
1557             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1558                 $link = "$htmlroot/$1.html$section";
1559             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1560                 $link = "$htmlroot/$1.html$section";
1561             } else {
1562                 warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
1563                              "no .pod or .pm found\n";
1564                 $link = "";
1565                 $linktext = $section unless defined($linktext);
1566             }
1567         }
1568     }
1569
1570     process_text(\$linktext, 0);
1571     if ($link) {
1572         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1573         # implies $htmlroot eq ''. This means that the link in question
1574         # needs a prefix of $htmldir if it begins with '/'. The test for
1575         # the initial '/' is done to avoid '#'-only links, and to allow
1576         # for other kinds of links, like file:, ftp:, etc.
1577         my $url ;
1578         if (  $htmlfileurl ne '' ) {
1579             $link = "$htmldir$link"
1580                 if ( $link =~ m{^/} ) ;
1581
1582             $url = relativize_url( $link, $htmlfileurl ) ;
1583 # print( "  b: [$link,$htmlfileurl,$url]\n" ) ;
1584         }
1585         else {
1586             $url = $link ;
1587         }
1588
1589         $s1 = "<A HREF=\"$url\">$linktext</A>";
1590     } else {
1591         $s1 = "<EM>$linktext</EM>";
1592     }
1593     return $s1;
1594 }
1595
1596 #
1597 # relativize_url - convert an absolute URL to one relative to a base URL.
1598 # Assumes both end in a filename.
1599 #
1600 sub relativize_url {
1601     my ($dest,$source) = @_ ;
1602
1603     my ($dest_volume,$dest_directory,$dest_file) =
1604         File::Spec::Unix->splitpath( $dest ) ;
1605     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1606
1607     my ($source_volume,$source_directory,$source_file) =
1608         File::Spec::Unix->splitpath( $source ) ;
1609     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1610
1611     my $rel_path = '' ;
1612     if ( $dest ne '' ) {
1613        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1614     }
1615
1616     if ( $rel_path ne ''                &&
1617          substr( $rel_path, -1 ) ne '/' &&
1618          substr( $dest_file, 0, 1 ) ne '#'
1619         ) {
1620         $rel_path .= "/$dest_file" ;
1621     }
1622     else {
1623         $rel_path .= "$dest_file" ;
1624     }
1625
1626     return $rel_path ;
1627 }
1628
1629 #
1630 # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
1631 # convert them to corresponding HTML directives.
1632 #
1633 sub process_BFI {
1634     my($tag, $str) = @_;
1635     my($s1);                    # work string
1636     my(%repltext) = (   'B' => 'STRONG',
1637                         'F' => 'EM',
1638                         'I' => 'EM');
1639
1640     # extract the modified text and convert to HTML
1641     $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
1642     return $s1;
1643 }
1644
1645 #
1646 # process_C - process the C<> pod-escape.
1647 #
1648 sub process_C {
1649     my($str, $doref) = @_;
1650     my($s1, $s2);
1651
1652     $s1 = $str;
1653     $s1 =~ s/\([^()]*\)//g;     # delete parentheses
1654     $s2 = $s1;
1655     $s1 =~ s/\W//g;             # delete bogus characters
1656     $str = html_escape($str);
1657
1658     # if there was a pod file that we found earlier with an appropriate
1659     # =item directive, then create a link to that page.
1660     if ($doref && defined $items{$s1}) {
1661         if ( $items{$s1} ) {
1662             my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
1663             # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1664             # implies $htmlroot eq ''.
1665             my $url ;
1666             if (  $htmlfileurl ne '' ) {
1667                 $link = "$htmldir$link" ;
1668                 $url = relativize_url( $link, $htmlfileurl ) ;
1669             }
1670             else {
1671                 $url = $link ;
1672             }
1673             $s1 = "<A HREF=\"$url\">$str</A>" ;
1674         }
1675         else {
1676             $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) .  "\">$str</A>" ;
1677         }
1678         $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
1679         confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
1680     } else {
1681         $s1 = "<CODE>$str</CODE>";
1682         # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
1683     }
1684
1685
1686     return $s1;
1687 }
1688
1689 #
1690 # process_E - process the E<> pod directive which seems to escape a character.
1691 #
1692 sub process_E {
1693     my($str) = @_;
1694
1695     for ($str) {
1696         s,([^/].*),\&$1\;,g;
1697     }
1698
1699     return $str;
1700 }
1701
1702 #
1703 # process_Z - process the Z<> pod directive which really just amounts to
1704 # ignoring it.  this allows someone to start a paragraph with an =
1705 #
1706 sub process_Z {
1707     my($str) = @_;
1708
1709     # there is no equivalent in HTML for this so just ignore it.
1710     $str = "";
1711     return $str;
1712 }
1713
1714 #
1715 # process_S - process the S<> pod directive which means to convert all
1716 # spaces in the string to non-breaking spaces (in HTML-eze).
1717 #
1718 sub process_S {
1719     my($str) = @_;
1720
1721     # convert all spaces in the text to non-breaking spaces in HTML.
1722     $str =~ s/ /&nbsp;/g;
1723     return $str;
1724 }
1725
1726 #
1727 # process_X - this is supposed to make an index entry.  we'll just
1728 # ignore it.
1729 #
1730 sub process_X {
1731     return '';
1732 }
1733
1734
1735 #
1736 # Adapted from Nick Ing-Simmons' PodToHtml package.
1737 sub relative_url {
1738     my $source_file = shift ;
1739     my $destination_file = shift;
1740
1741     my $source = URI::file->new_abs($source_file);
1742     my $uo = URI::file->new($destination_file,$source)->abs;
1743     return $uo->rel->as_string;
1744 }
1745
1746
1747 #
1748 # finish_list - finish off any pending HTML lists.  this should be called
1749 # after the entire pod file has been read and converted.
1750 #
1751 sub finish_list {
1752     while ($listlevel > 0) {
1753         print HTML "</DL>\n";
1754         $listlevel--;
1755     }
1756 }
1757
1758 #
1759 # htmlify - converts a pod section specification to a suitable section
1760 # specification for HTML.  if first arg is 1, only takes 1st word.
1761 #
1762 sub htmlify {
1763     my($compact, $heading) = @_;
1764
1765     if ($compact) {
1766       $heading =~ /^(\w+)/;
1767       $heading = $1;
1768     }
1769
1770   # $heading = lc($heading);
1771   $heading =~ s/[^\w\s]/_/g;
1772   $heading =~ s/(\s+)/ /g;
1773   $heading =~ s/^\s*(.*?)\s*$/$1/s;
1774   $heading =~ s/ /_/g;
1775   $heading =~ s/\A(.{32}).*\Z/$1/s;
1776   $heading =~ s/\s+\Z//;
1777   $heading =~ s/_{2,}/_/g;
1778
1779   return $heading;
1780 }
1781
1782 BEGIN {
1783 }
1784
1785 1;