lib/Pod/Html.pm

   1 package Pod::Html;
   2 use strict;
   3 require Exporter;
   4
   5 use vars qw($VERSION @ISA @EXPORT);
   6 $VERSION = 1.04;
   7 @ISA = qw(Exporter);
   8 @EXPORT = qw(pod2html htmlify);
   9
  10 use Carp;
  11 use Config;
  12 use Cwd;
  13 use File::Spec;
  14 use File::Spec::Unix;
  15 use Getopt::Long;
  16
  17 use locale;     # make \w work right in non-ASCII lands
  18
  19 =head1 NAME
  20
  21 Pod::Html - module to convert pod files to HTML
  22
  23 =head1 SYNOPSIS
  24
  25     use Pod::Html;
  26     pod2html([options]);
  27
  28 =head1 DESCRIPTION
  29
  30 Converts files from pod format (see L<perlpod>) to HTML format.  It
  31 can automatically generate indexes and cross-references, and it keeps
  32 a cache of things it knows how to cross-reference.
  33
  34 =head1 ARGUMENTS
  35
  36 Pod::Html takes the following arguments:
  37
  38 =over 4
  39
  40 =item backlink
  41
  42     --backlink="Back to Top"
  43
  44 Adds "Back to Top" links in front of every C<head1> heading (except for
  45 the first).  By default, no backlinks are generated.
  46
  47 =item cachedir
  48
  49     --cachedir=name
  50
  51 Creates the item and directory caches in the given directory.
  52
  53 =item css
  54
  55     --css=stylesheet
  56
  57 Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
  58 C<style> attributes that are output by default (to avoid conflicts).
  59
  60 =item flush
  61
  62     --flush
  63
  64 Flushes the item and directory caches.
  65
  66 =item header
  67
  68     --header
  69     --noheader
  70
  71 Creates header and footer blocks containing the text of the C<NAME>
  72 section.  By default, no headers are generated.
  73
  74 =item help
  75
  76     --help
  77
  78 Displays the usage message.
  79
  80 =item htmldir
  81
  82     --htmldir=name
  83
  84 Sets the directory in which the resulting HTML file is placed.  This
  85 is used to generate relative links to other files. Not passing this
  86 causes all links to be absolute, since this is the value that tells
  87 Pod::Html the root of the documentation tree.
  88
  89 =item htmlroot
  90
  91     --htmlroot=name
  92
  93 Sets the base URL for the HTML files.  When cross-references are made,
  94 the HTML root is prepended to the URL.
  95
  96 =item index
  97
  98     --index
  99     --noindex
 100
 101 Generate an index at the top of the HTML file.  This is the default
 102 behaviour.
 103
 104 =item infile
 105
 106     --infile=name
 107
 108 Specify the pod file to convert.  Input is taken from STDIN if no
 109 infile is specified.
 110
 111 =item libpods
 112
 113     --libpods=name:...:name
 114
 115 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
 116
 117 =item netscape
 118
 119     --netscape
 120     --nonetscape
 121
 122 B<Deprecated>, has no effect. For backwards compatibility only.
 123
 124 =item outfile
 125
 126     --outfile=name
 127
 128 Specify the HTML file to create.  Output goes to STDOUT if no outfile
 129 is specified.
 130
 131 =item podpath
 132
 133     --podpath=name:...:name
 134
 135 Specify which subdirectories of the podroot contain pod files whose
 136 HTML converted forms can be linked to in cross references.
 137
 138 =item podroot
 139
 140     --podroot=name
 141
 142 Specify the base directory for finding library pods.
 143
 144 =item quiet
 145
 146     --quiet
 147     --noquiet
 148
 149 Don't display I<mostly harmless> warning messages.  These messages
 150 will be displayed by default.  But this is not the same as C<verbose>
 151 mode.
 152
 153 =item recurse
 154
 155     --recurse
 156     --norecurse
 157
 158 Recurse into subdirectories specified in podpath (default behaviour).
 159
 160 =item title
 161
 162     --title=title
 163
 164 Specify the title of the resulting HTML file.
 165
 166 =item verbose
 167
 168     --verbose
 169     --noverbose
 170
 171 Display progress messages.  By default, they won't be displayed.
 172
 173 =back
 174
 175 =head1 EXAMPLE
 176
 177     pod2html("pod2html",
 178              "--podpath=lib:ext:pod:vms",
 179              "--podroot=/usr/src/perl",
 180              "--htmlroot=/perl/nmanual",
 181              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
 182              "--recurse",
 183              "--infile=foo.pod",
 184              "--outfile=/perl/nmanual/foo.html");
 185
 186 =head1 ENVIRONMENT
 187
 188 Uses C<$Config{pod2html}> to setup default options.
 189
 190 =head1 AUTHOR
 191
 192 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 193
 194 =head1 SEE ALSO
 195
 196 L<perlpod>
 197
 198 =head1 COPYRIGHT
 199
 200 This program is distributed under the Artistic License.
 201
 202 =cut
 203
 204 my $cachedir = ".";             # The directory to which item and directory
 205                                 # caches will be written.
 206 my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
 207 my $dircache = "pod2htmd$cache_ext";
 208 my $itemcache = "pod2htmi$cache_ext";
 209
 210 my @begin_stack = ();           # begin/end stack
 211
 212 my @libpods = ();               # files to search for links from C<> directives
 213 my $htmlroot = "/";             # http-server base directory from which all
 214                                 #   relative paths in $podpath stem.
 215 my $htmldir = "";               # The directory to which the html pages
 216                                 # will (eventually) be written.
 217 my $htmlfile = "";              # write to stdout by default
 218 my $htmlfileurl = "" ;          # The url that other files would use to
 219                                 # refer to this file.  This is only used
 220                                 # to make relative urls that point to
 221                                 # other files.
 222 my $podfile = "";               # read from stdin by default
 223 my @podpath = ();               # list of directories containing library pods.
 224 my $podroot = File::Spec->curdir;               # filesystem base directory from which all
 225                                 #   relative paths in $podpath stem.
 226 my $css = '';                   # Cascading style sheet
 227 my $recurse = 1;                # recurse on subdirectories in $podpath.
 228 my $quiet = 0;                  # not quiet by default
 229 my $verbose = 0;                # not verbose by default
 230 my $doindex = 1;                # non-zero if we should generate an index
 231 my $backlink = '';              # text for "back to top" links
 232 my $listlevel = 0;              # current list depth
 233 my @listend = ();               # the text to use to end the list.
 234 my $after_lpar = 0;             # set to true after a par in an =item
 235 my $ignore = 1;                 # whether or not to format text.  we don't
 236                                 #   format text until we hit our first pod
 237                                 #   directive.
 238
 239 my %items_named = ();           # for the multiples of the same item in perlfunc
 240 my @items_seen = ();
 241 my $title;                      # title to give the pod(s)
 242 my $header = 0;                 # produce block header/footer
 243 my $top = 1;                    # true if we are at the top of the doc.  used
 244                                 #   to prevent the first <hr /> directive.
 245 my $paragraph;                  # which paragraph we're processing (used
 246                                 #   for error messages)
 247 my $ptQuote = 0;                # status of double-quote conversion
 248 my %pages = ();                 # associative array used to find the location
 249                                 #   of pages referenced by L<> links.
 250 my %sections = ();              # sections within this page
 251 my %items = ();                 # associative array used to find the location
 252                                 #   of =item directives referenced by C<> links
 253 my %local_items = ();           # local items - avoid destruction of %items
 254 my $Is83;                       # is dos with short filenames (8.3)
 255
 256 sub init_globals {
 257 $dircache = "pod2htmd$cache_ext";
 258 $itemcache = "pod2htmi$cache_ext";
 259
 260 @begin_stack = ();              # begin/end stack
 261
 262 @libpods = ();          # files to search for links from C<> directives
 263 $htmlroot = "/";                # http-server base directory from which all
 264                                 #   relative paths in $podpath stem.
 265 $htmldir = "";          # The directory to which the html pages
 266                                 # will (eventually) be written.
 267 $htmlfile = "";         # write to stdout by default
 268 $podfile = "";          # read from stdin by default
 269 @podpath = ();          # list of directories containing library pods.
 270 $podroot = File::Spec->curdir;          # filesystem base directory from which all
 271                                 #   relative paths in $podpath stem.
 272 $css = '';                   # Cascading style sheet
 273 $recurse = 1;           # recurse on subdirectories in $podpath.
 274 $quiet = 0;             # not quiet by default
 275 $verbose = 0;           # not verbose by default
 276 $doindex = 1;                   # non-zero if we should generate an index
 277 $backlink = '';         # text for "back to top" links
 278 $listlevel = 0;         # current list depth
 279 @listend = ();          # the text to use to end the list.
 280 $after_lpar = 0;        # set to true after a par in an =item
 281 $ignore = 1;                    # whether or not to format text.  we don't
 282                                 #   format text until we hit our first pod
 283                                 #   directive.
 284
 285 @items_seen = ();
 286 %items_named = ();
 287 $header = 0;                    # produce block header/footer
 288 $title = '';                    # title to give the pod(s)
 289 $top = 1;                       # true if we are at the top of the doc.  used
 290                                 #   to prevent the first <hr /> directive.
 291 $paragraph = '';                        # which paragraph we're processing (used
 292                                 #   for error messages)
 293 %sections = ();         # sections within this page
 294
 295 # These are not reinitialised here but are kept as a cache.
 296 # See get_cache and related cache management code.
 297 #%pages = ();                   # associative array used to find the location
 298                                 #   of pages referenced by L<> links.
 299 #%items = ();                   # associative array used to find the location
 300                                 #   of =item directives referenced by C<> links
 301 %local_items = ();
 302 $Is83=$^O eq 'dos';
 303 }
 304
 305 #
 306 # clean_data: global clean-up of pod data
 307 #
 308 sub clean_data($){
 309     my( $dataref ) = @_;
 310     for my $i ( 0..$#{$dataref} ) {
 311         ${$dataref}[$i] =~ s/\s+\Z//;
 312
 313         # have a look for all-space lines
 314       if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
 315             my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
 316             splice( @$dataref, $i, 1, @chunks );
 317         }
 318     }
 319 }
 320
 321
 322 sub pod2html {
 323     local(@ARGV) = @_;
 324     local($/);
 325     local $_;
 326
 327     init_globals();
 328
 329     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 330
 331     # cache of %pages and %items from last time we ran pod2html
 332
 333     #undef $opt_help if defined $opt_help;
 334
 335     # parse the command-line parameters
 336     parse_command_line();
 337
 338     # escape the backlink argument (same goes for title but is done later...)
 339     $backlink = html_escape($backlink) if defined $backlink;
 340
 341     # set some variables to their default values if necessary
 342     local *POD;
 343     unless (@ARGV && $ARGV[0]) {
 344         $podfile  = "-" unless $podfile;        # stdin
 345         open(POD, "<$podfile")
 346                 || die "$0: cannot open $podfile file for input: $!\n";
 347     } else {
 348         $podfile = $ARGV[0];  # XXX: might be more filenames
 349         *POD = *ARGV;
 350     }
 351     $htmlfile = "-" unless $htmlfile;   # stdout
 352     $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
 353     $htmldir =~ s#/\z## ;               # so we don't get a //
 354     if (  $htmlroot eq ''
 355        && defined( $htmldir )
 356        && $htmldir ne ''
 357        && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
 358        )
 359     {
 360         # Set the 'base' url for this file, so that we can use it
 361         # as the location from which to calculate relative links
 362         # to other files. If this is '', then absolute links will
 363         # be used throughout.
 364         $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
 365     }
 366
 367     # read the pod a paragraph at a time
 368     warn "Scanning for sections in input file(s)\n" if $verbose;
 369     $/ = "";
 370     my @poddata  = <POD>;
 371     close(POD);
 372     clean_data( \@poddata );
 373
 374     # scan the pod for =head[1-6] directives and build an index
 375     my $index = scan_headings(\%sections, @poddata);
 376
 377     unless($index) {
 378         warn "No headings in $podfile\n" if $verbose;
 379     }
 380
 381     # open the output file
 382     open(HTML, ">$htmlfile")
 383             || die "$0: cannot open $htmlfile file for output: $!\n";
 384
 385     # put a title in the HTML file if one wasn't specified
 386     if ($title eq '') {
 387         TITLE_SEARCH: {
 388             for (my $i = 0; $i < @poddata; $i++) {
 389                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 390                     for my $para ( @poddata[$i, $i+1] ) {
 391                         last TITLE_SEARCH
 392                             if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
 393                     }
 394                 }
 395
 396             }
 397         }
 398     }
 399     if (!$title and $podfile =~ /\.pod\z/) {
 400         # probably a split pod so take first =head[12] as title
 401         for (my $i = 0; $i < @poddata; $i++) {
 402             last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 403         }
 404         warn "adopted '$title' as title for $podfile\n"
 405             if $verbose and $title;
 406     }
 407     if ($title) {
 408         $title =~ s/\s*\(.*\)//;
 409     } else {
 410         warn "$0: no title for $podfile" unless $quiet;
 411         $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
 412         $title = ($podfile eq "-" ? 'No Title' : $1);
 413         warn "using $title" if $verbose;
 414     }
 415     $title = html_escape($title);
 416
 417     my $csslink = '';
 418     my $bodystyle = ' style="background-color: white"';
 419     my $tdstyle = ' style="background-color: #cccccc"';
 420
 421     if ($css) {
 422       $csslink = qq(\n<link rel="stylesheet" href="$css" type="text/css" />);
 423       $csslink =~ s,\\,/,g;
 424       $csslink =~ s,(/.):,$1|,;
 425       $bodystyle = '';
 426       $tdstyle = '';
 427     }
 428
 429       my $block = $header ? <<END_OF_BLOCK : '';
 430 <table border="0" width="100%" cellspacing="0" cellpadding="3">
 431 <tr><td class="block"$tdstyle valign="middle">
 432 <big><strong><span class="block">&nbsp;$title</span></strong></big>
 433 </td></tr>
 434 </table>
 435 END_OF_BLOCK
 436
 437     print HTML <<END_OF_HEAD;
 438 <?xml version="1.0" encoding="UTF-8"?>
 439 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 440 <html xmlns="http://www.w3.org/1999/xhtml">
 441 head>
 442 <title>$title</title>$csslink
 443 <link rev="made" href="mailto:$Config{perladmin}" />
 444 </head>
 445
 446 <body$bodystyle>
 447 $block
 448 END_OF_HEAD
 449
 450     # load/reload/validate/cache %pages and %items
 451     get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
 452
 453     # scan the pod for =item directives
 454     scan_items( \%local_items, "", @poddata);
 455
 456     # put an index at the top of the file.  note, if $doindex is 0 we
 457     # still generate an index, but surround it with an html comment.
 458     # that way some other program can extract it if desired.
 459     $index =~ s/--+/-/g;
 460     print HTML "<p><a name=\"__index__\"></a></p>\n";
 461     print HTML "<!-- INDEX BEGIN -->\n";
 462     print HTML "<!--\n" unless $doindex;
 463     print HTML $index;
 464     print HTML "-->\n" unless $doindex;
 465     print HTML "<!-- INDEX END -->\n\n";
 466     print HTML "<hr />\n" if $doindex and $index;
 467
 468     # now convert this file
 469     my $after_item;             # set to true after an =item
 470     my $need_dd = 0;
 471     warn "Converting input file $podfile\n" if $verbose;
 472     foreach my $i (0..$#poddata){
 473         $ptQuote = 0; # status of quote conversion
 474
 475         $_ = $poddata[$i];
 476         $paragraph = $i+1;
 477         if (/^(=.*)/s) {        # is it a pod directive?
 478             $ignore = 0;
 479             $after_item = 0;
 480             $need_dd = 0;
 481             $_ = $1;
 482             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 483                 process_begin($1, $2);
 484             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 485                 process_end($1, $2);
 486             } elsif (/^=cut/) {                 # =cut
 487                 process_cut();
 488             } elsif (/^=pod/) {                 # =pod
 489                 process_pod();
 490             } else {
 491                 next if @begin_stack && $begin_stack[-1] ne 'html';
 492
 493                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 494                     process_head( $1, $2, $doindex && $index );
 495                 } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
 496                     warn "$0: $podfile: =item without bullet, number or text"
 497                        . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
 498                     $need_dd = process_item( $1 );
 499                     $after_item = 1;
 500                 } elsif (/^=over\s*(.*)/) {             # =over N
 501                     process_over();
 502                 } elsif (/^=back/) {            # =back
 503                     process_back();
 504                 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
 505                     process_for($1,$2);
 506                 } else {
 507                     /^=(\S*)\s*/;
 508                     warn "$0: $podfile: unknown pod directive '$1' in "
 509                        . "paragraph $paragraph.  ignoring.\n";
 510                 }
 511             }
 512             $top = 0;
 513         }
 514         else {
 515             next if $ignore;
 516             next if @begin_stack && $begin_stack[-1] ne 'html';
 517             print HTML and next if @begin_stack && $begin_stack[-1] eq 'html';
 518             print HTML "<dd>\n" if $need_dd;
 519             my $text = $_;
 520             if( $text =~ /\A\s+/ ){
 521                 process_pre( \$text );
 522                 print HTML "<pre>\n$text</pre>\n";
 523
 524             } else {
 525                 process_text( \$text );
 526
 527                 # experimental: check for a paragraph where all lines
 528                 # have some ...\t...\t...\n pattern
 529                 if( $text =~ /\t/ ){
 530                     my @lines = split( "\n", $text );
 531                     if( @lines > 1 ){
 532                         my $all = 2;
 533                         foreach my $line ( @lines ){
 534                             if( $line =~ /\S/ && $line !~ /\t/ ){
 535                                 $all--;
 536                                 last if $all == 0;
 537                             }
 538                         }
 539                         if( $all > 0 ){
 540                             $text =~ s/\t+/<td>/g;
 541                             $text =~ s/^/<tr><td>/gm;
 542                             $text = '<table cellspacing="0" cellpadding="0">' .
 543                                     $text . '</table>';
 544                         }
 545                     }
 546                 }
 547                 ## end of experimental
 548
 549                 if( $after_item ){
 550                     print HTML "$text\n";
 551                     $after_lpar = 1;
 552                 } else {
 553                     print HTML "<p>$text</p>\n";
 554                 }
 555             }
 556             print HTML "</dd>\n" if $need_dd;
 557             $after_item = 0;
 558         }
 559     }
 560
 561     # finish off any pending directives
 562     finish_list();
 563
 564     # link to page index
 565     print HTML "<p><a href=\"#__index__\"><small>$backlink</small></a></p>\n"
 566         if $doindex and $index and $backlink;
 567
 568     print HTML <<END_OF_TAIL;
 569 $block
 570 </body>
 571
 572 </html>
 573 END_OF_TAIL
 574
 575     # close the html file
 576     close(HTML);
 577
 578     warn "Finished\n" if $verbose;
 579 }
 580
 581 ##############################################################################
 582
 583 my $usage;                      # see below
 584 sub usage {
 585     my $podfile = shift;
 586     warn "$0: $podfile: @_\n" if @_;
 587     die $usage;
 588 }
 589
 590 $usage =<<END_OF_USAGE;
 591 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 592            --podpath=<name>:...:<name> --podroot=<name>
 593            --libpods=<name>:...:<name> --recurse --verbose --index
 594            --netscape --norecurse --noindex --cachedir=<name>
 595
 596   --backlink     - set text for "back to top" links (default: none).
 597   --cachedir     - directory for the item and directory cache files.
 598   --css          - stylesheet URL
 599   --flush        - flushes the item and directory caches.
 600   --[no]header   - produce block header/footer (default is no headers).
 601   --help         - prints this message.
 602   --htmldir      - directory for resulting HTML files.
 603   --htmlroot     - http-server base directory from which all relative paths
 604                    in podpath stem (default is /).
 605   --[no]index    - generate an index at the top of the resulting html
 606                    (default behaviour).
 607   --infile       - filename for the pod to convert (input taken from stdin
 608                    by default).
 609   --libpods      - colon-separated list of pages to search for =item pod
 610                    directives in as targets of C<> and implicit links (empty
 611                    by default).  note, these are not filenames, but rather
 612                    page names like those that appear in L<> links.
 613   --outfile      - filename for the resulting html file (output sent to
 614                    stdout by default).
 615   --podpath      - colon-separated list of directories containing library
 616                    pods (empty by default).
 617   --podroot      - filesystem base directory from which all relative paths
 618                    in podpath stem (default is .).
 619   --[no]quiet    - supress some benign warning messages (default is off).
 620   --[no]recurse  - recurse on those subdirectories listed in podpath
 621                    (default behaviour).
 622   --title        - title that will appear in resulting html file.
 623   --[no]verbose  - self-explanatory (off by default).
 624   --[no]netscape - deprecated, has no effect. for backwards compatibility only.
 625
 626 END_OF_USAGE
 627
 628 sub parse_command_line {
 629     my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
 630         $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
 631         $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
 632         $opt_recurse,$opt_title,$opt_verbose);
 633
 634     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 635     my $result = GetOptions(
 636                             'backlink=s' => \$opt_backlink,
 637                             'cachedir=s' => \$opt_cachedir,
 638                             'css=s'      => \$opt_css,
 639                             'flush'      => \$opt_flush,
 640                             'header!'    => \$opt_header,
 641                             'help'       => \$opt_help,
 642                             'htmldir=s'  => \$opt_htmldir,
 643                             'htmlroot=s' => \$opt_htmlroot,
 644                             'index!'     => \$opt_index,
 645                             'infile=s'   => \$opt_infile,
 646                             'libpods=s'  => \$opt_libpods,
 647                             'netscape!'  => \$opt_netscape,
 648                             'outfile=s'  => \$opt_outfile,
 649                             'podpath=s'  => \$opt_podpath,
 650                             'podroot=s'  => \$opt_podroot,
 651                             'quiet!'     => \$opt_quiet,
 652                             'recurse!'   => \$opt_recurse,
 653                             'title=s'    => \$opt_title,
 654                             'verbose!'   => \$opt_verbose,
 655                            );
 656     usage("-", "invalid parameters") if not $result;
 657
 658     usage("-") if defined $opt_help;    # see if the user asked for help
 659     $opt_help = "";                     # just to make -w shut-up.
 660
 661     @podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 662     @libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 663
 664     $backlink = $opt_backlink if defined $opt_backlink;
 665     $cachedir = $opt_cachedir if defined $opt_cachedir;
 666     $css      = $opt_css      if defined $opt_css;
 667     $header   = $opt_header   if defined $opt_header;
 668     $htmldir  = $opt_htmldir  if defined $opt_htmldir;
 669     $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 670     $doindex  = $opt_index    if defined $opt_index;
 671     $podfile  = $opt_infile   if defined $opt_infile;
 672     $htmlfile = $opt_outfile  if defined $opt_outfile;
 673     $podroot  = $opt_podroot  if defined $opt_podroot;
 674     $quiet    = $opt_quiet    if defined $opt_quiet;
 675     $recurse  = $opt_recurse  if defined $opt_recurse;
 676     $title    = $opt_title    if defined $opt_title;
 677     $verbose  = $opt_verbose  if defined $opt_verbose;
 678
 679     warn "Flushing item and directory caches\n"
 680         if $opt_verbose && defined $opt_flush;
 681     $dircache = "$cachedir/pod2htmd$cache_ext";
 682     $itemcache = "$cachedir/pod2htmi$cache_ext";
 683     unlink($dircache, $itemcache) if defined $opt_flush;
 684 }
 685
 686
 687 my $saved_cache_key;
 688
 689 sub get_cache {
 690     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 691     my @cache_key_args = @_;
 692
 693     # A first-level cache:
 694     # Don't bother reading the cache files if they still apply
 695     # and haven't changed since we last read them.
 696
 697     my $this_cache_key = cache_key(@cache_key_args);
 698
 699     return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
 700
 701     # load the cache of %pages and %items if possible.  $tests will be
 702     # non-zero if successful.
 703     my $tests = 0;
 704     if (-f $dircache && -f $itemcache) {
 705         warn "scanning for item cache\n" if $verbose;
 706         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 707     }
 708
 709     # if we didn't succeed in loading the cache then we must (re)build
 710     #  %pages and %items.
 711     if (!$tests) {
 712         warn "scanning directories in pod-path\n" if $verbose;
 713         scan_podpath($podroot, $recurse, 0);
 714     }
 715     $saved_cache_key = cache_key(@cache_key_args);
 716 }
 717
 718 sub cache_key {
 719     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 720     return join('!', $dircache, $itemcache, $recurse,
 721         @$podpath, $podroot, stat($dircache), stat($itemcache));
 722 }
 723
 724 #
 725 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 726 #  are valid caches of %pages and %items.  if they are valid then it loads
 727 #  them and returns a non-zero value.
 728 #
 729 sub load_cache {
 730     my($dircache, $itemcache, $podpath, $podroot) = @_;
 731     my($tests);
 732     local $_;
 733
 734     $tests = 0;
 735
 736     open(CACHE, "<$itemcache") ||
 737         die "$0: error opening $itemcache for reading: $!\n";
 738     $/ = "\n";
 739
 740     # is it the same podpath?
 741     $_ = <CACHE>;
 742     chomp($_);
 743     $tests++ if (join(":", @$podpath) eq $_);
 744
 745     # is it the same podroot?
 746     $_ = <CACHE>;
 747     chomp($_);
 748     $tests++ if ($podroot eq $_);
 749
 750     # load the cache if its good
 751     if ($tests != 2) {
 752         close(CACHE);
 753         return 0;
 754     }
 755
 756     warn "loading item cache\n" if $verbose;
 757     while (<CACHE>) {
 758         /(.*?) (.*)$/;
 759         $items{$1} = $2;
 760     }
 761     close(CACHE);
 762
 763     warn "scanning for directory cache\n" if $verbose;
 764     open(CACHE, "<$dircache") ||
 765         die "$0: error opening $dircache for reading: $!\n";
 766     $/ = "\n";
 767     $tests = 0;
 768
 769     # is it the same podpath?
 770     $_ = <CACHE>;
 771     chomp($_);
 772     $tests++ if (join(":", @$podpath) eq $_);
 773
 774     # is it the same podroot?
 775     $_ = <CACHE>;
 776     chomp($_);
 777     $tests++ if ($podroot eq $_);
 778
 779     # load the cache if its good
 780     if ($tests != 2) {
 781         close(CACHE);
 782         return 0;
 783     }
 784
 785     warn "loading directory cache\n" if $verbose;
 786     while (<CACHE>) {
 787         /(.*?) (.*)$/;
 788         $pages{$1} = $2;
 789     }
 790
 791     close(CACHE);
 792
 793     return 1;
 794 }
 795
 796 #
 797 # scan_podpath - scans the directories specified in @podpath for directories,
 798 #  .pod files, and .pm files.  it also scans the pod files specified in
 799 #  @libpods for =item directives.
 800 #
 801 sub scan_podpath {
 802     my($podroot, $recurse, $append) = @_;
 803     my($pwd, $dir);
 804     my($libpod, $dirname, $pod, @files, @poddata);
 805
 806     unless($append) {
 807         %items = ();
 808         %pages = ();
 809     }
 810
 811     # scan each directory listed in @podpath
 812     $pwd = getcwd();
 813     chdir($podroot)
 814         || die "$0: error changing to directory $podroot: $!\n";
 815     foreach $dir (@podpath) {
 816         scan_dir($dir, $recurse);
 817     }
 818
 819     # scan the pods listed in @libpods for =item directives
 820     foreach $libpod (@libpods) {
 821         # if the page isn't defined then we won't know where to find it
 822         # on the system.
 823         next unless defined $pages{$libpod} && $pages{$libpod};
 824
 825         # if there is a directory then use the .pod and .pm files within it.
 826         # NOTE: Only finds the first so-named directory in the tree.
 827 #       if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 828         if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 829             #  find all the .pod and .pm files within the directory
 830             $dirname = $1;
 831             opendir(DIR, $dirname) ||
 832                 die "$0: error opening directory $dirname: $!\n";
 833             @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
 834             closedir(DIR);
 835
 836             # scan each .pod and .pm file for =item directives
 837             foreach $pod (@files) {
 838                 open(POD, "<$dirname/$pod") ||
 839                     die "$0: error opening $dirname/$pod for input: $!\n";
 840                 @poddata = <POD>;
 841                 close(POD);
 842                 clean_data( \@poddata );
 843
 844                 scan_items( \%items, "$dirname/$pod", @poddata);
 845             }
 846
 847             # use the names of files as =item directives too.
 848 ### Don't think this should be done this way - confuses issues.(WL)
 849 ###         foreach $pod (@files) {
 850 ###             $pod =~ /^(.*)(\.pod|\.pm)$/;
 851 ###             $items{$1} = "$dirname/$1.html" if $1;
 852 ###         }
 853         } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
 854                  $pages{$libpod} =~ /([^:]*\.pm):/) {
 855             # scan the .pod or .pm file for =item directives
 856             $pod = $1;
 857             open(POD, "<$pod") ||
 858                 die "$0: error opening $pod for input: $!\n";
 859             @poddata = <POD>;
 860             close(POD);
 861             clean_data( \@poddata );
 862
 863             scan_items( \%items, "$pod", @poddata);
 864         } else {
 865             warn "$0: shouldn't be here (line ".__LINE__."\n";
 866         }
 867     }
 868     @poddata = ();      # clean-up a bit
 869
 870     chdir($pwd)
 871         || die "$0: error changing to directory $pwd: $!\n";
 872
 873     # cache the item list for later use
 874     warn "caching items for later use\n" if $verbose;
 875     open(CACHE, ">$itemcache") ||
 876         die "$0: error open $itemcache for writing: $!\n";
 877
 878     print CACHE join(":", @podpath) . "\n$podroot\n";
 879     foreach my $key (keys %items) {
 880         print CACHE "$key $items{$key}\n";
 881     }
 882
 883     close(CACHE);
 884
 885     # cache the directory list for later use
 886     warn "caching directories for later use\n" if $verbose;
 887     open(CACHE, ">$dircache") ||
 888         die "$0: error open $dircache for writing: $!\n";
 889
 890     print CACHE join(":", @podpath) . "\n$podroot\n";
 891     foreach my $key (keys %pages) {
 892         print CACHE "$key $pages{$key}\n";
 893     }
 894
 895     close(CACHE);
 896 }
 897
 898 #
 899 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 900 #  files, and .pm files.  notes those that it finds.  this information will
 901 #  be used later in order to figure out where the pages specified in L<>
 902 #  links are on the filesystem.
 903 #
 904 sub scan_dir {
 905     my($dir, $recurse) = @_;
 906     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 907     local $_;
 908
 909     @subdirs = ();
 910     @pods = ();
 911
 912     opendir(DIR, $dir) ||
 913         die "$0: error opening directory $dir: $!\n";
 914     while (defined($_ = readdir(DIR))) {
 915         if (-d "$dir/$_" && $_ ne "." && $_ ne "..") {      # directory
 916             $pages{$_}  = "" unless defined $pages{$_};
 917             $pages{$_} .= "$dir/$_:";
 918             push(@subdirs, $_);
 919         } elsif (/\.pod\z/) {                               # .pod
 920             s/\.pod\z//;
 921             $pages{$_}  = "" unless defined $pages{$_};
 922             $pages{$_} .= "$dir/$_.pod:";
 923             push(@pods, "$dir/$_.pod");
 924         } elsif (/\.html\z/) {                              # .html
 925             s/\.html\z//;
 926             $pages{$_}  = "" unless defined $pages{$_};
 927             $pages{$_} .= "$dir/$_.pod:";
 928         } elsif (/\.pm\z/) {                                # .pm
 929             s/\.pm\z//;
 930             $pages{$_}  = "" unless defined $pages{$_};
 931             $pages{$_} .= "$dir/$_.pm:";
 932             push(@pods, "$dir/$_.pm");
 933         }
 934     }
 935     closedir(DIR);
 936
 937     # recurse on the subdirectories if necessary
 938     if ($recurse) {
 939         foreach my $subdir (@subdirs) {
 940             scan_dir("$dir/$subdir", $recurse);
 941         }
 942     }
 943 }
 944
 945 #
 946 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
 947 #  build an index.
 948 #
 949 sub scan_headings {
 950     my($sections, @data) = @_;
 951     my($tag, $which_head, $otitle, $listdepth, $index);
 952
 953     # here we need      local $ignore = 0;
 954     #  unfortunately, we can't have it, because $ignore is lexical
 955     $ignore = 0;
 956
 957     $listdepth = 0;
 958     $index = "";
 959
 960     # scan for =head directives, note their name, and build an index
 961     #  pointing to each of them.
 962     foreach my $line (@data) {
 963       if ($line =~ /^=(head)([1-6])\s+(.*)/) {
 964         ($tag, $which_head, $otitle) = ($1,$2,$3);
 965
 966         my $title = depod( $otitle );
 967         my $name = htmlify( $title );
 968         $$sections{$name} = 1;
 969         $name =~ s/\s/_/g; # htmlify keeps spaces but we don't want them here..
 970         $title = process_text( \$otitle );
 971
 972             while ($which_head != $listdepth) {
 973                 if ($which_head > $listdepth) {
 974                     $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
 975                     $listdepth++;
 976                 } elsif ($which_head < $listdepth) {
 977                     $listdepth--;
 978                     $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
 979                 }
 980             }
 981
 982             $index .= "\n" . ("\t" x $listdepth) . "<li>" .
 983                       "<a href=\"#" . $name . "\">" .
 984                       $title . "</a></li>";
 985         }
 986     }
 987
 988     # finish off the lists
 989     while ($listdepth--) {
 990         $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
 991     }
 992
 993     # get rid of bogus lists
 994     $index =~ s,\t*<ul>\s*</ul>\n,,g;
 995
 996     $ignore = 1;        # restore old value;
 997
 998     return $index;
 999 }
1000
1001 #
1002 # scan_items - scans the pod specified by $pod for =item directives.  we
1003 #  will use this information later on in resolving C<> links.
1004 #
1005 sub scan_items {
1006     my( $itemref, $pod, @poddata ) = @_;
1007     my($i, $item);
1008     local $_;
1009
1010     $pod =~ s/\.pod\z//;
1011     $pod .= ".html" if $pod;
1012
1013     foreach $i (0..$#poddata) {
1014         my $txt = depod( $poddata[$i] );
1015
1016         # figure out what kind of item it is.
1017         # Build string for referencing this item.
1018         if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1019             next unless $1;
1020             $item = $1;
1021         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1022             $item = $1;
1023         } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1024             $item = $1;
1025         } else {
1026             next;
1027         }
1028         my $fid = fragment_id( $item );
1029         $$itemref{$fid} = "$pod" if $fid;
1030     }
1031 }
1032
1033 #
1034 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
1035 #
1036 sub process_head {
1037     my($tag, $heading, $hasindex) = @_;
1038
1039     # figure out the level of the =head
1040     $tag =~ /head([1-6])/;
1041     my $level = $1;
1042
1043     if( $listlevel ){
1044         warn "$0: $podfile: unterminated list at =head in paragraph $paragraph.  ignoring.\n";
1045         while( $listlevel ){
1046             process_back();
1047         }
1048     }
1049
1050     print HTML "<p>\n";
1051     if( $level == 1 && ! $top ){
1052       print HTML "<a href=\"#__index__\"><small>$backlink</small></a>\n"
1053         if $hasindex and $backlink;
1054       print HTML "</p>\n<hr />\n"
1055     } else {
1056       print HTML "</p>\n";
1057     }
1058
1059     my $name = htmlify( depod( $heading ) );
1060     $name =~ s/\s/_/g; # htmlify keeps spaces but we don't want them here...
1061     my $convert = process_text( \$heading );
1062     print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1063 }
1064
1065
1066 #
1067 # emit_item_tag - print an =item's text
1068 # Note: The global $EmittedItem is used for inhibiting self-references.
1069 #
1070 my $EmittedItem;
1071
1072 sub emit_item_tag($$$){
1073     my( $otext, $text, $compact ) = @_;
1074     my $item = fragment_id( $text );
1075
1076     $EmittedItem = $item;
1077     ### print STDERR "emit_item_tag=$item ($text)\n";
1078
1079     print HTML '<strong>';
1080     if ($items_named{$item}++) {
1081         print HTML process_text( \$otext );
1082     } else {
1083         my $name = 'item_' . $item;
1084         $name =~ s/\s/_/g; # we don't want spaces here...
1085         print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
1086     }
1087     print HTML "</strong><br />\n";
1088     undef( $EmittedItem );
1089 }
1090
1091 sub emit_li {
1092     my( $tag ) = @_;
1093     if( $items_seen[$listlevel]++ == 0 ){
1094         push( @listend, "</$tag>" );
1095         print HTML "<$tag>\n";
1096     }
1097     my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1098     print HTML "<$emitted>";
1099     return $emitted;
1100 }
1101
1102 #
1103 # process_item - convert a pod item tag and convert it to HTML format.
1104 #
1105 sub process_item {
1106     my( $otext ) = @_;
1107     my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
1108
1109     # lots of documents start a list without doing an =over.  this is
1110     # bad!  but, the proper thing to do seems to be to just assume
1111     # they did do an =over.  so warn them once and then continue.
1112     if( $listlevel == 0 ){
1113         warn "$0: $podfile: unexpected =item directive in paragraph $paragraph.  ignoring.\n";
1114         process_over();
1115     }
1116
1117     # formatting: insert a paragraph if preceding item has >1 paragraph
1118     if( $after_lpar ){
1119         print HTML "<p></p>\n";
1120         $after_lpar = 0;
1121     }
1122
1123     # remove formatting instructions from the text
1124     my $text = depod( $otext );
1125
1126     my $emitted; # the tag actually emitted, used for closing
1127
1128     # all the list variants:
1129     if( $text =~ /\A\*/ ){ # bullet
1130         $emitted = emit_li( 'ul' );
1131         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1132             my $tag = $1;
1133             $otext =~ s/\A\*\s+//;
1134             emit_item_tag( $otext, $tag, 1 );
1135         }
1136
1137     } elsif( $text =~ /\A\d+/ ){ # numbered list
1138         $emitted = emit_li( 'ol' );
1139         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1140             my $tag = $1;
1141             $otext =~ s/\A\d+\.?\s*//;
1142             emit_item_tag( $otext, $tag, 1 );
1143         }
1144
1145     } else {                    # definition list
1146         $emitted = emit_li( 'dl' );
1147         if ($text =~ /\A(.+)\Z/s ){ # should have text
1148             emit_item_tag( $otext, $text, 1 );
1149         }
1150         $need_dd = 1;
1151     }
1152     print HTML "</$emitted>" if $emitted;
1153     print HTML "\n";
1154     return $need_dd;
1155 }
1156
1157 #
1158 # process_over - process a pod over tag and start a corresponding HTML list.
1159 #
1160 sub process_over {
1161     # start a new list
1162     $listlevel++;
1163     push( @items_seen, 0 );
1164     $after_lpar = 0;
1165 }
1166
1167 #
1168 # process_back - process a pod back tag and convert it to HTML format.
1169 #
1170 sub process_back {
1171     if( $listlevel == 0 ){
1172         warn "$0: $podfile: unexpected =back directive in paragraph $paragraph.  ignoring.\n";
1173         return;
1174     }
1175
1176     # close off the list.  note, I check to see if $listend[$listlevel] is
1177     # defined because an =item directive may have never appeared and thus
1178     # $listend[$listlevel] may have never been initialized.
1179     $listlevel--;
1180     if( defined $listend[$listlevel] ){
1181         print HTML '<p></p>' if $after_lpar;
1182         print HTML $listend[$listlevel];
1183         print HTML "\n";
1184         pop( @listend );
1185     }
1186     $after_lpar = 0;
1187
1188     # clean up item count
1189     pop( @items_seen );
1190 }
1191
1192 #
1193 # process_cut - process a pod cut tag, thus start ignoring pod directives.
1194 #
1195 sub process_cut {
1196     $ignore = 1;
1197 }
1198
1199 #
1200 # process_pod - process a pod tag, thus stop ignoring pod directives
1201 # until we see a corresponding cut.
1202 #
1203 sub process_pod {
1204     # no need to set $ignore to 0 cause the main loop did it
1205 }
1206
1207 #
1208 # process_for - process a =for pod tag.  if it's for html, spit
1209 # it out verbatim, if illustration, center it, otherwise ignore it.
1210 #
1211 sub process_for {
1212     my($whom, $text) = @_;
1213     if ( $whom =~ /^(pod2)?html$/i) {
1214         print HTML $text;
1215     } elsif ($whom =~ /^illustration$/i) {
1216         1 while chomp $text;
1217         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1218           $text .= $ext, last if -r "$text$ext";
1219         }
1220         print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1221     }
1222 }
1223
1224 #
1225 # process_begin - process a =begin pod tag.  this pushes
1226 # whom we're beginning on the begin stack.  if there's a
1227 # begin stack, we only print if it us.
1228 #
1229 sub process_begin {
1230     my($whom, $text) = @_;
1231     $whom = lc($whom);
1232     push (@begin_stack, $whom);
1233     if ( $whom =~ /^(pod2)?html$/) {
1234         print HTML $text if $text;
1235     }
1236 }
1237
1238 #
1239 # process_end - process a =end pod tag.  pop the
1240 # begin stack.  die if we're mismatched.
1241 #
1242 sub process_end {
1243     my($whom, $text) = @_;
1244     $whom = lc($whom);
1245     if ($begin_stack[-1] ne $whom ) {
1246         die "Unmatched begin/end at chunk $paragraph\n"
1247     }
1248     pop( @begin_stack );
1249 }
1250
1251 #
1252 # process_pre - indented paragraph, made into <pre></pre>
1253 #
1254 sub process_pre {
1255     my( $text ) = @_;
1256     my( $rest );
1257     return if $ignore;
1258
1259     $rest = $$text;
1260
1261     # insert spaces in place of tabs
1262     $rest =~ s#.*#
1263             my $line = $&;
1264             1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1265             $line;
1266         #eg;
1267
1268     # convert some special chars to HTML escapes
1269     $rest = html_escape($rest);
1270
1271     # try and create links for all occurrences of perl.* within
1272     # the preformatted text.
1273     $rest =~ s{
1274                  (\s*)(perl\w+)
1275               }{
1276                  if ( defined $pages{$2} ){     # is a link
1277                      qq($1<a href="$htmlroot/$pages{$2}">$2</a>);
1278                  } elsif (defined $pages{dosify($2)}) { # is a link
1279                      qq($1<a href="$htmlroot/$pages{dosify($2)}">$2</a>);
1280                  } else {
1281                      "$1$2";
1282                  }
1283               }xeg;
1284      $rest =~ s{
1285                  (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1286                }{
1287                   my $url ;
1288                   if ( $htmlfileurl ne '' ){
1289                      # Here, we take advantage of the knowledge
1290                      # that $htmlfileurl ne '' implies $htmlroot eq ''.
1291                      # Since $htmlroot eq '', we need to prepend $htmldir
1292                      # on the fron of the link to get the absolute path
1293                      # of the link's target. We check for a leading '/'
1294                      # to avoid corrupting links that are #, file:, etc.
1295                      my $old_url = $3 ;
1296                      $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1297                      $url = relativize_url( "$old_url.html", $htmlfileurl );
1298                   } else {
1299                      $url = "$3.html" ;
1300                   }
1301                   "$1$url" ;
1302                }xeg;
1303
1304     # Look for embedded URLs and make them into links.  We don't
1305     # relativize them since they are best left as the author intended.
1306
1307     my $urls = '(' . join ('|', qw{
1308                 http
1309                 telnet
1310                 mailto
1311                 news
1312                 gopher
1313                 file
1314                 wais
1315                 ftp
1316             } )
1317         . ')';
1318
1319     my $ltrs = '\w';
1320     my $gunk = '/#~:.?+=&%@!\-';
1321     my $punc = '.:?\-';
1322     my $any  = "${ltrs}${gunk}${punc}";
1323
1324     $rest =~ s{
1325         \b                          # start at word boundary
1326         (                           # begin $1  {
1327           $urls     :               # need resource and a colon
1328           (?!:)                     # Ignore File::, among others.
1329           [$any] +?                 # followed by on or more
1330                                     #  of any valid character, but
1331                                     #  be conservative and take only
1332                                     #  what you need to....
1333         )                           # end   $1  }
1334         (?=                         # look-ahead non-consumptive assertion
1335                 [$punc]*            # either 0 or more puntuation
1336                 [^$any]             #   followed by a non-url char
1337             |                       # or else
1338                 $                   #   then end of the string
1339         )
1340       }{<a href="$1">$1</a>}igox;
1341
1342     # text should be as it is (verbatim)
1343     $$text = $rest;
1344 }
1345
1346
1347 #
1348 # pure text processing
1349 #
1350 # pure_text/inIS_text: differ with respect to automatic C<> recognition.
1351 # we don't want this to happen within IS
1352 #
1353 sub pure_text($){
1354     my $text = shift();
1355     process_puretext( $text, \$ptQuote, 1 );
1356 }
1357
1358 sub inIS_text($){
1359     my $text = shift();
1360     process_puretext( $text, \$ptQuote, 0 );
1361 }
1362
1363 #
1364 # process_puretext - process pure text (without pod-escapes) converting
1365 #  double-quotes and handling implicit C<> links.
1366 #
1367 sub process_puretext {
1368     my($text, $quote, $notinIS) = @_;
1369
1370     ## Guessing at func() or [$@%&]*var references in plain text is destined
1371     ## to produce some strange looking ref's. uncomment to disable:
1372     ## $notinIS = 0;
1373
1374     my(@words, $lead, $trail);
1375
1376     # convert double-quotes to single-quotes
1377     if( $$quote && $text =~ s/"/''/s ){
1378         $$quote = 0;
1379     }
1380     while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1381     $$quote = 1 if $text =~ s/"/``/s;
1382
1383     # keep track of leading and trailing white-space
1384     $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1385     $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1386
1387     # split at space/non-space boundaries
1388     @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1389
1390     # process each word individually
1391     foreach my $word (@words) {
1392         # skip space runs
1393         next if $word =~ /^\s*$/;
1394         # see if we can infer a link
1395         if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
1396             # has parenthesis so should have been a C<> ref
1397             ## try for a pagename (perlXXX(1))?
1398             my( $func, $args ) = ( $1, $2 );
1399             if( $args =~ /^\d+$/ ){
1400                 my $url = page_sect( $word, '' );
1401                 if( defined $url ){
1402                     $word = "<a href=\"$url\">the $word manpage</a>";
1403                     next;
1404                 }
1405             }
1406             ## try function name for a link, append tt'ed argument list
1407             $word = emit_C( $func, '', "($args)");
1408
1409 #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1410 ##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1411 ##          # perl variables, should be a C<> ref
1412 ##          $word = emit_C( $word );
1413
1414         } elsif ($word =~ m,^\w+://\w,) {
1415             # looks like a URL
1416             # Don't relativize it: leave it as the author intended
1417             $word = qq(<a href="$word">$word</a>);
1418         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1419             # looks like an e-mail address
1420             my ($w1, $w2, $w3) = ("", $word, "");
1421             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1422             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1423             $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1424         } else {
1425             $word = html_escape($word) if $word =~ /["&<>]/;
1426         }
1427     }
1428
1429     # put everything back together
1430     return $lead . join( '', @words ) . $trail;
1431 }
1432
1433
1434 #
1435 # process_text - handles plaintext that appears in the input pod file.
1436 # there may be pod commands embedded within the text so those must be
1437 # converted to html commands.
1438 #
1439
1440 sub process_text1($$;$$);
1441 sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1442 sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
1443
1444 sub process_text {
1445     return if $ignore;
1446     my( $tref ) = @_;
1447     my $res = process_text1( 0, $tref );
1448     $$tref = $res;
1449 }
1450
1451 sub process_text1($$;$$){
1452     my( $lev, $rstr, $func, $closing ) = @_;
1453     my $res = '';
1454
1455     unless (defined $func) {
1456         $func = '';
1457         $lev++;
1458     }
1459
1460     if( $func eq 'B' ){
1461         # B<text> - boldface
1462         $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1463
1464     } elsif( $func eq 'C' ){
1465         # C<code> - can be a ref or <code></code>
1466         # need to extract text
1467         my $par = go_ahead( $rstr, 'C', $closing );
1468
1469         ## clean-up of the link target
1470         my $text = depod( $par );
1471
1472         ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1473         ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1474
1475         $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1476
1477     } elsif( $func eq 'E' ){
1478         # E<x> - convert to character
1479         $$rstr =~ s/^([^>]*)>//;
1480         my $escape = $1;
1481         $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1482         $res = "&$escape;";
1483
1484     } elsif( $func eq 'F' ){
1485         # F<filename> - italizice
1486         $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1487
1488     } elsif( $func eq 'I' ){
1489         # I<text> - italizice
1490         $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1491
1492     } elsif( $func eq 'L' ){
1493         # L<link> - link
1494         ## L<text|cross-ref> => produce text, use cross-ref for linking
1495         ## L<cross-ref> => make text from cross-ref
1496         ## need to extract text
1497         my $par = go_ahead( $rstr, 'L', $closing );
1498
1499         # some L<>'s that shouldn't be:
1500         # a) full-blown URL's are emitted as-is
1501         if( $par =~ m{^\w+://}s ){
1502             return make_URL_href( $par );
1503         }
1504         # b) C<...> is stripped and treated as C<>
1505         if( $par =~ /^C<(.*)>$/ ){
1506             my $text = depod( $1 );
1507             return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1508         }
1509
1510         # analyze the contents
1511         $par =~ s/\n/ /g;   # undo word-wrapped tags
1512         my $opar = $par;
1513         my $linktext;
1514         if( $par =~ s{^([^|]+)\|}{} ){
1515             $linktext = $1;
1516         }
1517
1518         # make sure sections start with a /
1519         $par =~ s{^"}{/"};
1520
1521         my( $page, $section, $ident );
1522
1523         # check for link patterns
1524         if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1525             # we've got a name/ident (no quotes)
1526             ( $page, $ident ) = ( $1, $2 );
1527             ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1528
1529         } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1530             # even though this should be a "section", we go for ident first
1531             ( $page, $ident ) = ( $1, $2 );
1532             ### print STDERR "--> L<$par> to page $page, section $section\n";
1533
1534         } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1535             ( $page, $section ) = ( '', $par );
1536             ### print STDERR "--> L<$par> to void page, section $section\n";
1537
1538         } else {
1539             ( $page, $section ) = ( $par, '' );
1540             ### print STDERR "--> L<$par> to page $par, void section\n";
1541         }
1542
1543         # now, either $section or $ident is defined. the convoluted logic
1544         # below tries to resolve L<> according to what the user specified.
1545         # failing this, we try to find the next best thing...
1546         my( $url, $ltext, $fid );
1547
1548         RESOLVE: {
1549             if( defined $ident ){
1550                 ## try to resolve $ident as an item
1551                 ( $url, $fid ) = coderef( $page, $ident );
1552                 if( $url ){
1553                     if( ! defined( $linktext ) ){
1554                         $linktext = $ident;
1555                         $linktext .= " in " if $ident && $page;
1556                         $linktext .= "the $page manpage" if $page;
1557                     }
1558                     ###  print STDERR "got coderef url=$url\n";
1559                     last RESOLVE;
1560                 }
1561                 ## no luck: go for a section (auto-quoting!)
1562                 $section = $ident;
1563             }
1564             ## now go for a section
1565             my $htmlsection = htmlify( $section );
1566             $url = page_sect( $page, $htmlsection );
1567             if( $url ){
1568                 if( ! defined( $linktext ) ){
1569                     $linktext = $section;
1570                     $linktext .= " in " if $section && $page;
1571                     $linktext .= "the $page manpage" if $page;
1572                 }
1573                 ### print STDERR "got page/section url=$url\n";
1574                 last RESOLVE;
1575             }
1576             ## no luck: go for an ident
1577             if( $section ){
1578                 $ident = $section;
1579             } else {
1580                 $ident = $page;
1581                 $page  = undef();
1582             }
1583             ( $url, $fid ) = coderef( $page, $ident );
1584             if( $url ){
1585                 if( ! defined( $linktext ) ){
1586                     $linktext = $ident;
1587                     $linktext .= " in " if $ident && $page;
1588                     $linktext .= "the $page manpage" if $page;
1589                 }
1590                 ### print STDERR "got section=>coderef url=$url\n";
1591                 last RESOLVE;
1592             }
1593
1594             # warning; show some text.
1595             $linktext = $opar unless defined $linktext;
1596             warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1597         }
1598
1599         # now we have a URL or just plain code
1600         $$rstr = $linktext . '>' . $$rstr;
1601         if( defined( $url ) ){
1602             $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1603         } else {
1604             $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1605         }
1606
1607     } elsif( $func eq 'S' ){
1608         # S<text> - non-breaking spaces
1609         $res = process_text1( $lev, $rstr );
1610         $res =~ s/ /&nbsp;/g;
1611
1612     } elsif( $func eq 'X' ){
1613         # X<> - ignore
1614         $$rstr =~ s/^[^>]*>//;
1615
1616     } elsif( $func eq 'Z' ){
1617         # Z<> - empty
1618         warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1619             unless $$rstr =~ s/^>//;
1620
1621     } else {
1622         my $term = pattern $closing;
1623         while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1624             # all others: either recurse into new function or
1625             # terminate at closing angle bracket(s)
1626             my $pt = $1;
1627             $pt .= $2 if !$3 &&  $lev == 1;
1628             $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1629             return $res if !$3 && $lev > 1;
1630             if( $3 ){
1631                 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1632             }
1633         }
1634         if( $lev == 1 ){
1635             $res .= pure_text( $$rstr );
1636         } else {
1637             warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1638         }
1639     }
1640     return $res;
1641 }
1642
1643 #
1644 # go_ahead: extract text of an IS (can be nested)
1645 #
1646 sub go_ahead($$$){
1647     my( $rstr, $func, $closing ) = @_;
1648     my $res = '';
1649     my @closing = ($closing);
1650     while( $$rstr =~
1651       s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
1652         $res .= $1;
1653         unless( $3 ){
1654             shift @closing;
1655             return $res unless @closing;
1656         } else {
1657             unshift @closing, closing $4;
1658         }
1659         $res .= $2;
1660     }
1661     warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1662     return $res;
1663 }
1664
1665 #
1666 # emit_C - output result of C<text>
1667 #    $text is the depod-ed text
1668 #
1669 sub emit_C($;$$){
1670     my( $text, $nocode, $args ) = @_;
1671     $args = '' unless defined $args;
1672     my $res;
1673     my( $url, $fid ) = coderef( undef(), $text );
1674
1675     # need HTML-safe text
1676     my $linktext = html_escape( "$text$args" );
1677
1678     if( defined( $url ) &&
1679         (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1680         $res = "<a href=\"$url\"><code>$linktext</code></a>";
1681     } elsif( 0 && $nocode ){
1682         $res = $linktext;
1683     } else {
1684         $res = "<code>$linktext</code>";
1685     }
1686     return $res;
1687 }
1688
1689 #
1690 # html_escape: make text safe for HTML
1691 #
1692 sub html_escape {
1693     my $rest = $_[0];
1694     $rest   =~ s/&/&amp;/g;
1695     $rest   =~ s/</&lt;/g;
1696     $rest   =~ s/>/&gt;/g;
1697     $rest   =~ s/"/&quot;/g;
1698     $rest   =~ s/'/&apos;/g;
1699     return $rest;
1700 }
1701
1702
1703 #
1704 # dosify - convert filenames to 8.3
1705 #
1706 sub dosify {
1707     my($str) = @_;
1708     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1709     if ($Is83) {
1710         $str = lc $str;
1711         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1712         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1713     }
1714     return $str;
1715 }
1716
1717 #
1718 # page_sect - make a URL from the text of a L<>
1719 #
1720 sub page_sect($$) {
1721     my( $page, $section ) = @_;
1722     my( $linktext, $page83, $link);     # work strings
1723
1724     # check if we know that this is a section in this page
1725     if (!defined $pages{$page} && defined $sections{$page}) {
1726         $section = $page;
1727         $page = "";
1728         ### print STDERR "reset page='', section=$section\n";
1729     }
1730
1731     $page83=dosify($page);
1732     $page=$page83 if (defined $pages{$page83});
1733     if ($page eq "") {
1734         $link = "#" . htmlify( $section );
1735     } elsif ( $page =~ /::/ ) {
1736         $page =~ s,::,/,g;
1737         # Search page cache for an entry keyed under the html page name,
1738         # then look to see what directory that page might be in.  NOTE:
1739         # this will only find one page. A better solution might be to produce
1740         # an intermediate page that is an index to all such pages.
1741         my $page_name = $page ;
1742         $page_name =~ s,^.*/,,s ;
1743         if ( defined( $pages{ $page_name } ) &&
1744              $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1745            ) {
1746             $page = $1 ;
1747         }
1748         else {
1749             # NOTE: This branch assumes that all A::B pages are located in
1750             # $htmlroot/A/B.html . This is often incorrect, since they are
1751             # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1752             # analyze the contents of %pages and figure out where any
1753             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1754             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1755             # lib/A/B.pm. This is also limited, but it's an improvement.
1756             # Maybe a hints file so that the links point to the correct places
1757             # nonetheless?
1758
1759         }
1760         $link = "$htmlroot/$page.html";
1761         $link .= "#" . htmlify( $section ) if ($section);
1762     } elsif (!defined $pages{$page}) {
1763         $link = "";
1764     } else {
1765         $section = htmlify( $section ) if $section ne "";
1766         ### print STDERR "...section=$section\n";
1767
1768         # if there is a directory by the name of the page, then assume that an
1769         # appropriate section will exist in the subdirectory
1770 #       if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1771         if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1772             $link = "$htmlroot/$1/$section.html";
1773             ### print STDERR "...link=$link\n";
1774
1775         # since there is no directory by the name of the page, the section will
1776         # have to exist within a .html of the same name.  thus, make sure there
1777         # is a .pod or .pm that might become that .html
1778         } else {
1779             $section = "#$section" if $section;
1780             ### print STDERR "...section=$section\n";
1781
1782             # check if there is a .pod with the page name
1783             if ($pages{$page} =~ /([^:]*)\.pod:/) {
1784                 $link = "$htmlroot/$1.html$section";
1785             } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1786                 $link = "$htmlroot/$1.html$section";
1787             } else {
1788                 $link = "";
1789             }
1790         }
1791     }
1792
1793     if ($link) {
1794         # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1795         # implies $htmlroot eq ''. This means that the link in question
1796         # needs a prefix of $htmldir if it begins with '/'. The test for
1797         # the initial '/' is done to avoid '#'-only links, and to allow
1798         # for other kinds of links, like file:, ftp:, etc.
1799         my $url ;
1800         if (  $htmlfileurl ne '' ) {
1801             $link = "$htmldir$link" if $link =~ m{^/}s;
1802             $url = relativize_url( $link, $htmlfileurl );
1803 # print( "  b: [$link,$htmlfileurl,$url]\n" );
1804         }
1805         else {
1806             $url = $link ;
1807         }
1808         return $url;
1809
1810     } else {
1811         return undef();
1812     }
1813 }
1814
1815 #
1816 # relativize_url - convert an absolute URL to one relative to a base URL.
1817 # Assumes both end in a filename.
1818 #
1819 sub relativize_url {
1820     my ($dest,$source) = @_ ;
1821
1822     my ($dest_volume,$dest_directory,$dest_file) =
1823         File::Spec::Unix->splitpath( $dest ) ;
1824     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1825
1826     my ($source_volume,$source_directory,$source_file) =
1827         File::Spec::Unix->splitpath( $source ) ;
1828     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1829
1830     my $rel_path = '' ;
1831     if ( $dest ne '' ) {
1832        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1833     }
1834
1835     if ( $rel_path ne ''                &&
1836          substr( $rel_path, -1 ) ne '/' &&
1837          substr( $dest_file, 0, 1 ) ne '#'
1838         ) {
1839         $rel_path .= "/$dest_file" ;
1840     }
1841     else {
1842         $rel_path .= "$dest_file" ;
1843     }
1844
1845     return $rel_path ;
1846 }
1847
1848
1849 #
1850 # coderef - make URL from the text of a C<>
1851 #
1852 sub coderef($$){
1853     my( $page, $item ) = @_;
1854     my( $url );
1855
1856     my $fid = fragment_id( $item );
1857     if( defined( $page ) ){
1858         # we have been given a $page...
1859         $page =~ s{::}{/}g;
1860
1861         # Do we take it? Item could be a section!
1862         my $base = $items{$fid} || "";
1863         $base =~ s{[^/]*/}{};
1864         if( $base ne "$page.html" ){
1865             ###   print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1866             $page = undef();
1867         }
1868
1869     } else {
1870         # no page - local items precede cached items
1871         if( defined( $fid ) ){
1872             if(  exists $local_items{$fid} ){
1873                 $page = $local_items{$fid};
1874             } else {
1875                 $page = $items{$fid};
1876             }
1877         }
1878     }
1879
1880     # if there was a pod file that we found earlier with an appropriate
1881     # =item directive, then create a link to that page.
1882     if( defined $page ){
1883         if( $page ){
1884             if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
1885                 $page = $1 . '.html';
1886             }
1887             my $link = "$htmlroot/$page#item_$fid";
1888
1889             # Here, we take advantage of the knowledge that $htmlfileurl
1890             # ne '' implies $htmlroot eq ''.
1891             if (  $htmlfileurl ne '' ) {
1892                 $link = "$htmldir$link" ;
1893                 $url = relativize_url( $link, $htmlfileurl ) ;
1894             } else {
1895                 $url = $link ;
1896             }
1897         } else {
1898             $url = "#item_" . $fid;
1899         }
1900
1901         confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1902     }
1903     return( $url, $fid );
1904 }
1905
1906
1907
1908 #
1909 # Adapted from Nick Ing-Simmons' PodToHtml package.
1910 sub relative_url {
1911     my $source_file = shift ;
1912     my $destination_file = shift;
1913
1914     my $source = URI::file->new_abs($source_file);
1915     my $uo = URI::file->new($destination_file,$source)->abs;
1916     return $uo->rel->as_string;
1917 }
1918
1919
1920 #
1921 # finish_list - finish off any pending HTML lists.  this should be called
1922 # after the entire pod file has been read and converted.
1923 #
1924 sub finish_list {
1925     while ($listlevel > 0) {
1926         print HTML "</dl>\n";
1927         $listlevel--;
1928     }
1929 }
1930
1931 #
1932 # htmlify - converts a pod section specification to a suitable section
1933 # specification for HTML. Note that we keep spaces and special characters
1934 # except ", ? (Netscape problem) and the hyphen (writer's problem...).
1935 #
1936 sub htmlify {
1937     my( $heading) = @_;
1938     $heading =~ s/(\s+)/ /g;
1939     $heading =~ s/\s+\Z//;
1940     $heading =~ s/\A\s+//;
1941     # The hyphen is a disgrace to the English language.
1942     $heading =~ s/[-"?]//g;
1943     $heading = lc( $heading );
1944     return $heading;
1945 }
1946
1947 #
1948 # depod - convert text by eliminating all interior sequences
1949 # Note: can be called with copy or modify semantics
1950 #
1951 my %E2c;
1952 $E2c{lt}     = '<';
1953 $E2c{gt}     = '>';
1954 $E2c{sol}    = '/';
1955 $E2c{verbar} = '|';
1956 $E2c{amp}    = '&'; # in Tk's pods
1957
1958 sub depod1($;$$);
1959
1960 sub depod($){
1961     my $string;
1962     if( ref( $_[0] ) ){
1963         $string =  ${$_[0]};
1964         ${$_[0]} = depod1( \$string );
1965     } else {
1966         $string =  $_[0];
1967         depod1( \$string );
1968     }
1969 }
1970
1971 sub depod1($;$$){
1972   my( $rstr, $func, $closing ) = @_;
1973   my $res = '';
1974   return $res unless defined $$rstr;
1975   if( ! defined( $func ) ){
1976       # skip to next begin of an interior sequence
1977       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
1978          # recurse into its text
1979           $res .= $1 . depod1( $rstr, $2, closing $3);
1980       }
1981       $res .= $$rstr;
1982   } elsif( $func eq 'E' ){
1983       # E<x> - convert to character
1984       $$rstr =~ s/^([^>]*)>//;
1985       $res .= $E2c{$1} || "";
1986   } elsif( $func eq 'X' ){
1987       # X<> - ignore
1988       $$rstr =~ s/^[^>]*>//;
1989   } elsif( $func eq 'Z' ){
1990       # Z<> - empty
1991       $$rstr =~ s/^>//;
1992   } else {
1993       # all others: either recurse into new function or
1994       # terminate at closing angle bracket
1995       my $term = pattern $closing;
1996       while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
1997           $res .= $1;
1998           last unless $3;
1999           $res .= depod1( $rstr, $3, closing $4 );
2000       }
2001       ## If we're here and $2 ne '>': undelimited interior sequence.
2002       ## Ignored, as this is called without proper indication of where we are.
2003       ## Rely on process_text to produce diagnostics.
2004   }
2005   return $res;
2006 }
2007
2008 #
2009 # fragment_id - construct a fragment identifier from:
2010 #   a) =item text
2011 #   b) contents of C<...>
2012 #
2013 my @hc;
2014 sub fragment_id {
2015     my $text = shift();
2016     $text =~ s/\s+\Z//s;
2017     if( $text ){
2018         # a method or function?
2019         return $1 if $text =~ /(\w+)\s*\(/;
2020         return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2021
2022         # a variable name?
2023         return $1 if $text =~ /^([$@%*]\S+)/;
2024
2025         # some pattern matching operator?
2026         return $1 if $text =~ m|^(\w+/).*/\w*$|;
2027
2028         # fancy stuff... like "do { }"
2029         return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2030
2031         # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2032         # and some funnies with ... Module ...
2033         return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
2034         return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2035
2036         # text? normalize!
2037         $text =~ s/\s+/_/sg;
2038         $text =~ s{(\W)}{
2039          defined( $hc[ord($1)] ) ? $hc[ord($1)]
2040                  : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2041         $text = substr( $text, 0, 50 );
2042     } else {
2043         return undef();
2044     }
2045 }
2046
2047 #
2048 # make_URL_href - generate HTML href from URL
2049 # Special treatment for CGI queries.
2050 #
2051 sub make_URL_href($){
2052     my( $url ) = @_;
2053     if( $url !~
2054         s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2055         $url = "<a href=\"$url\">$url</a>";
2056     }
2057     return $url;
2058 }
2059
2060 1;