lib/Pod/Html.pm

   1 package Pod::Html;
   2 use strict;
   3 require Exporter;
   4
   5 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
   6 $VERSION = 1.07;
   7 @ISA = qw(Exporter);
   8 @EXPORT = qw(pod2html htmlify);
   9 @EXPORT_OK = qw(anchorify);
  10
  11 use Carp;
  12 use Config;
  13 use Cwd;
  14 use File::Spec;
  15 use File::Spec::Unix;
  16 use Getopt::Long;
  17
  18 use locale;     # make \w work right in non-ASCII lands
  19
  20 =head1 NAME
  21
  22 Pod::Html - module to convert pod files to HTML
  23
  24 =head1 SYNOPSIS
  25
  26     use Pod::Html;
  27     pod2html([options]);
  28
  29 =head1 DESCRIPTION
  30
  31 Converts files from pod format (see L<perlpod>) to HTML format.  It
  32 can automatically generate indexes and cross-references, and it keeps
  33 a cache of things it knows how to cross-reference.
  34
  35 =head1 FUNCTIONS
  36
  37 =head2 pod2html
  38
  39     pod2html("pod2html",
  40              "--podpath=lib:ext:pod:vms",
  41              "--podroot=/usr/src/perl",
  42              "--htmlroot=/perl/nmanual",
  43              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
  44              "--recurse",
  45              "--infile=foo.pod",
  46              "--outfile=/perl/nmanual/foo.html");
  47
  48 pod2html takes the following arguments:
  49
  50 =over 4
  51
  52 =item backlink
  53
  54     --backlink="Back to Top"
  55
  56 Adds "Back to Top" links in front of every C<head1> heading (except for
  57 the first).  By default, no backlinks are generated.
  58
  59 =item cachedir
  60
  61     --cachedir=name
  62
  63 Creates the item and directory caches in the given directory.
  64
  65 =item css
  66
  67     --css=stylesheet
  68
  69 Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
  70 C<style> attributes that are output by default (to avoid conflicts).
  71
  72 =item flush
  73
  74     --flush
  75
  76 Flushes the item and directory caches.
  77
  78 =item header
  79
  80     --header
  81     --noheader
  82
  83 Creates header and footer blocks containing the text of the C<NAME>
  84 section.  By default, no headers are generated.
  85
  86 =item help
  87
  88     --help
  89
  90 Displays the usage message.
  91
  92 =item hiddendirs
  93
  94     --hiddendirs
  95     --nohiddendirs
  96
  97 Include hidden directories in the search for POD's in podpath if recurse
  98 is set.
  99 The default is not to traverse any directory whose name begins with C<.>.
 100 See L</"podpath"> and L</"recurse">.
 101
 102 [This option is for backward compatibility only.
 103 It's hard to imagine that one would usefully create a module with a
 104 name component beginning with C<.>.]
 105
 106 =item htmldir
 107
 108     --htmldir=name
 109
 110 Sets the directory in which the resulting HTML file is placed.  This
 111 is used to generate relative links to other files. Not passing this
 112 causes all links to be absolute, since this is the value that tells
 113 Pod::Html the root of the documentation tree.
 114
 115 =item htmlroot
 116
 117     --htmlroot=name
 118
 119 Sets the base URL for the HTML files.  When cross-references are made,
 120 the HTML root is prepended to the URL.
 121
 122 =item index
 123
 124     --index
 125     --noindex
 126
 127 Generate an index at the top of the HTML file.  This is the default
 128 behaviour.
 129
 130 =item infile
 131
 132     --infile=name
 133
 134 Specify the pod file to convert.  Input is taken from STDIN if no
 135 infile is specified.
 136
 137 =item libpods
 138
 139     --libpods=name:...:name
 140
 141 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
 142
 143 =item netscape
 144
 145     --netscape
 146     --nonetscape
 147
 148 B<Deprecated>, has no effect. For backwards compatibility only.
 149
 150 =item outfile
 151
 152     --outfile=name
 153
 154 Specify the HTML file to create.  Output goes to STDOUT if no outfile
 155 is specified.
 156
 157 =item podpath
 158
 159     --podpath=name:...:name
 160
 161 Specify which subdirectories of the podroot contain pod files whose
 162 HTML converted forms can be linked to in cross references.
 163
 164 =item podroot
 165
 166     --podroot=name
 167
 168 Specify the base directory for finding library pods.
 169
 170 =item quiet
 171
 172     --quiet
 173     --noquiet
 174
 175 Don't display I<mostly harmless> warning messages.  These messages
 176 will be displayed by default.  But this is not the same as C<verbose>
 177 mode.
 178
 179 =item recurse
 180
 181     --recurse
 182     --norecurse
 183
 184 Recurse into subdirectories specified in podpath (default behaviour).
 185
 186 =item title
 187
 188     --title=title
 189
 190 Specify the title of the resulting HTML file.
 191
 192 =item verbose
 193
 194     --verbose
 195     --noverbose
 196
 197 Display progress messages.  By default, they won't be displayed.
 198
 199 =back
 200
 201 =head2 htmlify
 202
 203     htmlify($heading);
 204
 205 Converts a pod section specification to a suitable section specification
 206 for HTML. Note that we keep spaces and special characters except
 207 C<", ?> (Netscape problem) and the hyphen (writer's problem...).
 208
 209 =head2 anchorify
 210
 211     anchorify(@heading);
 212
 213 Similar to C<htmlify()>, but turns non-alphanumerics into underscores.  Note
 214 that C<anchorify()> is not exported by default.
 215
 216 =head1 ENVIRONMENT
 217
 218 Uses C<$Config{pod2html}> to setup default options.
 219
 220 =head1 AUTHOR
 221
 222 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 223
 224 =head1 SEE ALSO
 225
 226 L<perlpod>
 227
 228 =head1 COPYRIGHT
 229
 230 This program is distributed under the Artistic License.
 231
 232 =cut
 233
 234
 235 my($Cachedir);
 236 my($Dircache, $Itemcache);
 237 my @Begin_Stack;
 238 my @Libpods;
 239 my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
 240 my($Podfile, @Podpath, $Podroot);
 241 my $Css;
 242
 243 my $Recurse;
 244 my $Quiet;
 245 my $HiddenDirs;
 246 my $Verbose;
 247 my $Doindex;
 248
 249 my $Backlink;
 250 my($Listlevel, @Listend);
 251 my $After_Lpar;
 252 use vars qw($Ignore);  # need to localize it later.
 253
 254 my(%Items_Named, @Items_Seen);
 255 my($Title, $Header);
 256
 257 my $Top;
 258 my $Paragraph;
 259
 260 my %Sections;
 261
 262 # Caches
 263 my %Pages = ();                 # associative array used to find the location
 264                                 #   of pages referenced by L<> links.
 265 my %Items = ();                 # associative array used to find the location
 266                                 #   of =item directives referenced by C<> links
 267
 268 my %Local_Items;
 269 my $Is83;
 270 my $PTQuote;
 271
 272 my $Curdir = File::Spec->curdir;
 273
 274 init_globals();
 275
 276 sub init_globals {
 277     $Cachedir = ".";            # The directory to which item and directory
 278                                 # caches will be written.
 279
 280     $Dircache = "pod2htmd.tmp";
 281     $Itemcache = "pod2htmi.tmp";
 282
 283     @Begin_Stack = ();          # begin/end stack
 284
 285     @Libpods = ();              # files to search for links from C<> directives
 286     $Htmlroot = "/";            # http-server base directory from which all
 287                                 #   relative paths in $podpath stem.
 288     $Htmldir = "";              # The directory to which the html pages
 289                                 # will (eventually) be written.
 290     $Htmlfile = "";             # write to stdout by default
 291     $Htmlfileurl = "" ;         # The url that other files would use to
 292                                 # refer to this file.  This is only used
 293                                 # to make relative urls that point to
 294                                 # other files.
 295
 296     $Podfile = "";              # read from stdin by default
 297     @Podpath = ();              # list of directories containing library pods.
 298     $Podroot = $Curdir;         # filesystem base directory from which all
 299                                 #   relative paths in $podpath stem.
 300     $Css = '';                  # Cascading style sheet
 301     $Recurse = 1;               # recurse on subdirectories in $podpath.
 302     $Quiet = 0;                 # not quiet by default
 303     $Verbose = 0;               # not verbose by default
 304     $Doindex = 1;               # non-zero if we should generate an index
 305     $Backlink = '';             # text for "back to top" links
 306     $Listlevel = 0;             # current list depth
 307     @Listend = ();              # the text to use to end the list.
 308     $After_Lpar = 0;            # set to true after a par in an =item
 309     $Ignore = 1;                # whether or not to format text.  we don't
 310                                 #   format text until we hit our first pod
 311                                 #   directive.
 312
 313     @Items_Seen = ();           # for multiples of the same item in perlfunc
 314     %Items_Named = ();
 315     $Header = 0;                # produce block header/footer
 316     $Title = '';                # title to give the pod(s)
 317     $Top = 1;                   # true if we are at the top of the doc.  used
 318                                 #   to prevent the first <hr /> directive.
 319     $Paragraph = '';            # which paragraph we're processing (used
 320                                 #   for error messages)
 321     $PTQuote = 0;               # status of double-quote conversion
 322     %Sections = ();             # sections within this page
 323
 324     %Local_Items = ();
 325     $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
 326 }
 327
 328 #
 329 # clean_data: global clean-up of pod data
 330 #
 331 sub clean_data($){
 332     my( $dataref ) = @_;
 333     for my $i ( 0..$#{$dataref} ) {
 334         ${$dataref}[$i] =~ s/\s+\Z//;
 335
 336         # have a look for all-space lines
 337       if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
 338             my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
 339             splice( @$dataref, $i, 1, @chunks );
 340         }
 341     }
 342 }
 343
 344
 345 sub pod2html {
 346     local(@ARGV) = @_;
 347     local($/);
 348     local $_;
 349
 350     init_globals();
 351
 352     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 353
 354     # cache of %Pages and %Items from last time we ran pod2html
 355
 356     #undef $opt_help if defined $opt_help;
 357
 358     # parse the command-line parameters
 359     parse_command_line();
 360
 361     # escape the backlink argument (same goes for title but is done later...)
 362     $Backlink = html_escape($Backlink) if defined $Backlink;
 363
 364     # set some variables to their default values if necessary
 365     local *POD;
 366     unless (@ARGV && $ARGV[0]) {
 367         $Podfile  = "-" unless $Podfile;        # stdin
 368         open(POD, "<$Podfile")
 369                 || die "$0: cannot open $Podfile file for input: $!\n";
 370     } else {
 371         $Podfile = $ARGV[0];  # XXX: might be more filenames
 372         *POD = *ARGV;
 373     }
 374     $Htmlfile = "-" unless $Htmlfile;   # stdout
 375     $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
 376     $Htmldir =~ s#/\z## ;               # so we don't get a //
 377     if (  $Htmlroot eq ''
 378        && defined( $Htmldir )
 379        && $Htmldir ne ''
 380        && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
 381        )
 382     {
 383         # Set the 'base' url for this file, so that we can use it
 384         # as the location from which to calculate relative links
 385         # to other files. If this is '', then absolute links will
 386         # be used throughout.
 387         $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
 388     }
 389
 390     # read the pod a paragraph at a time
 391     warn "Scanning for sections in input file(s)\n" if $Verbose;
 392     $/ = "";
 393     my @poddata  = <POD>;
 394     close(POD);
 395
 396     # be eol agnostic
 397     for (@poddata) {
 398         if (/\r/) {
 399             if (/\r\n/) {
 400                 @poddata = map { s/\r\n/\n/g;
 401                                  /\n\n/ ?
 402                                      map { "$_\n\n" } split /\n\n/ :
 403                                      $_ } @poddata;
 404             } else {
 405                 @poddata = map { s/\r/\n/g;
 406                                  /\n\n/ ?
 407                                      map { "$_\n\n" } split /\n\n/ :
 408                                      $_ } @poddata;
 409             }
 410             last;
 411         }
 412     }
 413
 414     clean_data( \@poddata );
 415
 416     # scan the pod for =head[1-6] directives and build an index
 417     my $index = scan_headings(\%Sections, @poddata);
 418
 419     unless($index) {
 420         warn "No headings in $Podfile\n" if $Verbose;
 421     }
 422
 423     # open the output file
 424     open(HTML, ">$Htmlfile")
 425             || die "$0: cannot open $Htmlfile file for output: $!\n";
 426
 427     # put a title in the HTML file if one wasn't specified
 428     if ($Title eq '') {
 429         TITLE_SEARCH: {
 430             for (my $i = 0; $i < @poddata; $i++) {
 431                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 432                     for my $para ( @poddata[$i, $i+1] ) {
 433                         last TITLE_SEARCH
 434                             if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
 435                     }
 436                 }
 437
 438             }
 439         }
 440     }
 441     if (!$Title and $Podfile =~ /\.pod\z/) {
 442         # probably a split pod so take first =head[12] as title
 443         for (my $i = 0; $i < @poddata; $i++) {
 444             last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 445         }
 446         warn "adopted '$Title' as title for $Podfile\n"
 447             if $Verbose and $Title;
 448     }
 449     if ($Title) {
 450         $Title =~ s/\s*\(.*\)//;
 451     } else {
 452         warn "$0: no title for $Podfile.\n" unless $Quiet;
 453         $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
 454         $Title = ($Podfile eq "-" ? 'No Title' : $1);
 455         warn "using $Title" if $Verbose;
 456     }
 457     $Title = html_escape($Title);
 458
 459     my $csslink = '';
 460     my $bodystyle = ' style="background-color: white"';
 461     my $tdstyle = ' style="background-color: #cccccc"';
 462
 463     if ($Css) {
 464       $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
 465       $csslink =~ s,\\,/,g;
 466       $csslink =~ s,(/.):,$1|,;
 467       $bodystyle = '';
 468       $tdstyle = '';
 469     }
 470
 471       my $block = $Header ? <<END_OF_BLOCK : '';
 472 <table border="0" width="100%" cellspacing="0" cellpadding="3">
 473 <tr><td class="block"$tdstyle valign="middle">
 474 <big><strong><span class="block">&nbsp;$Title</span></strong></big>
 475 </td></tr>
 476 </table>
 477 END_OF_BLOCK
 478
 479     print HTML <<END_OF_HEAD;
 480 <?xml version="1.0" ?>
 481 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 482 <html xmlns="http://www.w3.org/1999/xhtml">
 483 <head>
 484 <title>$Title</title>$csslink
 485 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 486 <link rev="made" href="mailto:$Config{perladmin}" />
 487 </head>
 488
 489 <body$bodystyle>
 490 $block
 491 END_OF_HEAD
 492
 493     # load/reload/validate/cache %Pages and %Items
 494     get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse);
 495
 496     # scan the pod for =item directives
 497     scan_items( \%Local_Items, "", @poddata);
 498
 499     # put an index at the top of the file.  note, if $Doindex is 0 we
 500     # still generate an index, but surround it with an html comment.
 501     # that way some other program can extract it if desired.
 502     $index =~ s/--+/-/g;
 503     print HTML "<p><a name=\"__index__\"></a></p>\n";
 504     print HTML "<!-- INDEX BEGIN -->\n";
 505     print HTML "<!--\n" unless $Doindex;
 506     print HTML $index;
 507     print HTML "-->\n" unless $Doindex;
 508     print HTML "<!-- INDEX END -->\n\n";
 509     print HTML "<hr />\n" if $Doindex and $index;
 510
 511     # now convert this file
 512     my $after_item;             # set to true after an =item
 513     my $need_dd = 0;
 514     warn "Converting input file $Podfile\n" if $Verbose;
 515     foreach my $i (0..$#poddata){
 516         $PTQuote = 0; # status of quote conversion
 517
 518         $_ = $poddata[$i];
 519         $Paragraph = $i+1;
 520         if (/^(=.*)/s) {        # is it a pod directive?
 521             $Ignore = 0;
 522             $after_item = 0;
 523             $need_dd = 0;
 524             $_ = $1;
 525             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 526                 process_begin($1, $2);
 527             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 528                 process_end($1, $2);
 529             } elsif (/^=cut/) {                 # =cut
 530                 process_cut();
 531             } elsif (/^=pod/) {                 # =pod
 532                 process_pod();
 533             } else {
 534                 next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
 535
 536                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 537                     process_head( $1, $2, $Doindex && $index );
 538                 } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
 539                     $need_dd = process_item( $1 );
 540                     $after_item = 1;
 541                 } elsif (/^=over\s*(.*)/) {             # =over N
 542                     process_over();
 543                 } elsif (/^=back/) {            # =back
 544                     process_back($need_dd);
 545                 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
 546                     process_for($1,$2);
 547                 } else {
 548                     /^=(\S*)\s*/;
 549                     warn "$0: $Podfile: unknown pod directive '$1' in "
 550                        . "paragraph $Paragraph.  ignoring.\n" unless $Quiet;
 551                 }
 552             }
 553             $Top = 0;
 554         }
 555         else {
 556             next if $Ignore;
 557             next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
 558             print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
 559             print HTML "<dd>\n" if $need_dd;
 560             my $text = $_;
 561             if( $text =~ /\A\s+/ ){
 562                 process_pre( \$text );
 563                 print HTML "<pre>\n$text</pre>\n";
 564
 565             } else {
 566                 process_text( \$text );
 567
 568                 # experimental: check for a paragraph where all lines
 569                 # have some ...\t...\t...\n pattern
 570                 if( $text =~ /\t/ ){
 571                     my @lines = split( "\n", $text );
 572                     if( @lines > 1 ){
 573                         my $all = 2;
 574                         foreach my $line ( @lines ){
 575                             if( $line =~ /\S/ && $line !~ /\t/ ){
 576                                 $all--;
 577                                 last if $all == 0;
 578                             }
 579                         }
 580                         if( $all > 0 ){
 581                             $text =~ s/\t+/<td>/g;
 582                             $text =~ s/^/<tr><td>/gm;
 583                             $text = '<table cellspacing="0" cellpadding="0">' .
 584                                     $text . '</table>';
 585                         }
 586                     }
 587                 }
 588                 ## end of experimental
 589
 590                 if( $after_item ){
 591                     $After_Lpar = 1;
 592                 }
 593                 print HTML "<p>$text</p>\n";
 594             }
 595             print HTML "</dd>\n" if $need_dd;
 596             $after_item = 0;
 597         }
 598     }
 599
 600     # finish off any pending directives
 601     finish_list();
 602
 603     # link to page index
 604     print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
 605         if $Doindex and $index and $Backlink;
 606
 607     print HTML <<END_OF_TAIL;
 608 $block
 609 </body>
 610
 611 </html>
 612 END_OF_TAIL
 613
 614     # close the html file
 615     close(HTML);
 616
 617     warn "Finished\n" if $Verbose;
 618 }
 619
 620 ##############################################################################
 621
 622 sub usage {
 623     my $podfile = shift;
 624     warn "$0: $podfile: @_\n" if @_;
 625     die <<END_OF_USAGE;
 626 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 627            --podpath=<name>:...:<name> --podroot=<name>
 628            --libpods=<name>:...:<name> --recurse --verbose --index
 629            --netscape --norecurse --noindex --cachedir=<name>
 630
 631   --backlink     - set text for "back to top" links (default: none).
 632   --cachedir     - directory for the item and directory cache files.
 633   --css          - stylesheet URL
 634   --flush        - flushes the item and directory caches.
 635   --[no]header   - produce block header/footer (default is no headers).
 636   --help         - prints this message.
 637   --hiddendirs   - search hidden directories in podpath
 638   --htmldir      - directory for resulting HTML files.
 639   --htmlroot     - http-server base directory from which all relative paths
 640                    in podpath stem (default is /).
 641   --[no]index    - generate an index at the top of the resulting html
 642                    (default behaviour).
 643   --infile       - filename for the pod to convert (input taken from stdin
 644                    by default).
 645   --libpods      - colon-separated list of pages to search for =item pod
 646                    directives in as targets of C<> and implicit links (empty
 647                    by default).  note, these are not filenames, but rather
 648                    page names like those that appear in L<> links.
 649   --outfile      - filename for the resulting html file (output sent to
 650                    stdout by default).
 651   --podpath      - colon-separated list of directories containing library
 652                    pods (empty by default).
 653   --podroot      - filesystem base directory from which all relative paths
 654                    in podpath stem (default is .).
 655   --[no]quiet    - suppress some benign warning messages (default is off).
 656   --[no]recurse  - recurse on those subdirectories listed in podpath
 657                    (default behaviour).
 658   --title        - title that will appear in resulting html file.
 659   --[no]verbose  - self-explanatory (off by default).
 660   --[no]netscape - deprecated, has no effect. for backwards compatibility only.
 661
 662 END_OF_USAGE
 663
 664 }
 665
 666 sub parse_command_line {
 667     my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
 668         $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
 669         $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
 670         $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
 671
 672     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 673     my $result = GetOptions(
 674                             'backlink=s' => \$opt_backlink,
 675                             'cachedir=s' => \$opt_cachedir,
 676                             'css=s'      => \$opt_css,
 677                             'flush'      => \$opt_flush,
 678                             'header!'    => \$opt_header,
 679                             'help'       => \$opt_help,
 680                             'hiddendirs!'=> \$opt_hiddendirs,
 681                             'htmldir=s'  => \$opt_htmldir,
 682                             'htmlroot=s' => \$opt_htmlroot,
 683                             'index!'     => \$opt_index,
 684                             'infile=s'   => \$opt_infile,
 685                             'libpods=s'  => \$opt_libpods,
 686                             'netscape!'  => \$opt_netscape,
 687                             'outfile=s'  => \$opt_outfile,
 688                             'podpath=s'  => \$opt_podpath,
 689                             'podroot=s'  => \$opt_podroot,
 690                             'quiet!'     => \$opt_quiet,
 691                             'recurse!'   => \$opt_recurse,
 692                             'title=s'    => \$opt_title,
 693                             'verbose!'   => \$opt_verbose,
 694                            );
 695     usage("-", "invalid parameters") if not $result;
 696
 697     usage("-") if defined $opt_help;    # see if the user asked for help
 698     $opt_help = "";                     # just to make -w shut-up.
 699
 700     @Podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 701     @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 702
 703     $Backlink = $opt_backlink if defined $opt_backlink;
 704     $Cachedir = $opt_cachedir if defined $opt_cachedir;
 705     $Css      = $opt_css      if defined $opt_css;
 706     $Header   = $opt_header   if defined $opt_header;
 707     $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
 708     $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 709     $Doindex  = $opt_index    if defined $opt_index;
 710     $Podfile  = $opt_infile   if defined $opt_infile;
 711     $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
 712     $Htmlfile = $opt_outfile  if defined $opt_outfile;
 713     $Podroot  = $opt_podroot  if defined $opt_podroot;
 714     $Quiet    = $opt_quiet    if defined $opt_quiet;
 715     $Recurse  = $opt_recurse  if defined $opt_recurse;
 716     $Title    = $opt_title    if defined $opt_title;
 717     $Verbose  = $opt_verbose  if defined $opt_verbose;
 718
 719     warn "Flushing item and directory caches\n"
 720         if $opt_verbose && defined $opt_flush;
 721     $Dircache = "$Cachedir/pod2htmd.tmp";
 722     $Itemcache = "$Cachedir/pod2htmi.tmp";
 723     if (defined $opt_flush) {
 724         1 while unlink($Dircache, $Itemcache);
 725     }
 726 }
 727
 728
 729 my $Saved_Cache_Key;
 730
 731 sub get_cache {
 732     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 733     my @cache_key_args = @_;
 734
 735     # A first-level cache:
 736     # Don't bother reading the cache files if they still apply
 737     # and haven't changed since we last read them.
 738
 739     my $this_cache_key = cache_key(@cache_key_args);
 740
 741     return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
 742
 743     # load the cache of %Pages and %Items if possible.  $tests will be
 744     # non-zero if successful.
 745     my $tests = 0;
 746     if (-f $dircache && -f $itemcache) {
 747         warn "scanning for item cache\n" if $Verbose;
 748         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 749     }
 750
 751     # if we didn't succeed in loading the cache then we must (re)build
 752     #  %Pages and %Items.
 753     if (!$tests) {
 754         warn "scanning directories in pod-path\n" if $Verbose;
 755         scan_podpath($podroot, $recurse, 0);
 756     }
 757     $Saved_Cache_Key = cache_key(@cache_key_args);
 758 }
 759
 760 sub cache_key {
 761     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 762     return join('!', $dircache, $itemcache, $recurse,
 763         @$podpath, $podroot, stat($dircache), stat($itemcache));
 764 }
 765
 766 #
 767 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 768 #  are valid caches of %Pages and %Items.  if they are valid then it loads
 769 #  them and returns a non-zero value.
 770 #
 771 sub load_cache {
 772     my($dircache, $itemcache, $podpath, $podroot) = @_;
 773     my($tests);
 774     local $_;
 775
 776     $tests = 0;
 777
 778     open(CACHE, "<$itemcache") ||
 779         die "$0: error opening $itemcache for reading: $!\n";
 780     $/ = "\n";
 781
 782     # is it the same podpath?
 783     $_ = <CACHE>;
 784     chomp($_);
 785     $tests++ if (join(":", @$podpath) eq $_);
 786
 787     # is it the same podroot?
 788     $_ = <CACHE>;
 789     chomp($_);
 790     $tests++ if ($podroot eq $_);
 791
 792     # load the cache if its good
 793     if ($tests != 2) {
 794         close(CACHE);
 795         return 0;
 796     }
 797
 798     warn "loading item cache\n" if $Verbose;
 799     while (<CACHE>) {
 800         /(.*?) (.*)$/;
 801         $Items{$1} = $2;
 802     }
 803     close(CACHE);
 804
 805     warn "scanning for directory cache\n" if $Verbose;
 806     open(CACHE, "<$dircache") ||
 807         die "$0: error opening $dircache for reading: $!\n";
 808     $/ = "\n";
 809     $tests = 0;
 810
 811     # is it the same podpath?
 812     $_ = <CACHE>;
 813     chomp($_);
 814     $tests++ if (join(":", @$podpath) eq $_);
 815
 816     # is it the same podroot?
 817     $_ = <CACHE>;
 818     chomp($_);
 819     $tests++ if ($podroot eq $_);
 820
 821     # load the cache if its good
 822     if ($tests != 2) {
 823         close(CACHE);
 824         return 0;
 825     }
 826
 827     warn "loading directory cache\n" if $Verbose;
 828     while (<CACHE>) {
 829         /(.*?) (.*)$/;
 830         $Pages{$1} = $2;
 831     }
 832
 833     close(CACHE);
 834
 835     return 1;
 836 }
 837
 838 #
 839 # scan_podpath - scans the directories specified in @podpath for directories,
 840 #  .pod files, and .pm files.  it also scans the pod files specified in
 841 #  @Libpods for =item directives.
 842 #
 843 sub scan_podpath {
 844     my($podroot, $recurse, $append) = @_;
 845     my($pwd, $dir);
 846     my($libpod, $dirname, $pod, @files, @poddata);
 847
 848     unless($append) {
 849         %Items = ();
 850         %Pages = ();
 851     }
 852
 853     # scan each directory listed in @Podpath
 854     $pwd = getcwd();
 855     chdir($podroot)
 856         || die "$0: error changing to directory $podroot: $!\n";
 857     foreach $dir (@Podpath) {
 858         scan_dir($dir, $recurse);
 859     }
 860
 861     # scan the pods listed in @Libpods for =item directives
 862     foreach $libpod (@Libpods) {
 863         # if the page isn't defined then we won't know where to find it
 864         # on the system.
 865         next unless defined $Pages{$libpod} && $Pages{$libpod};
 866
 867         # if there is a directory then use the .pod and .pm files within it.
 868         # NOTE: Only finds the first so-named directory in the tree.
 869 #       if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 870         if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 871             #  find all the .pod and .pm files within the directory
 872             $dirname = $1;
 873             opendir(DIR, $dirname) ||
 874                 die "$0: error opening directory $dirname: $!\n";
 875             @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
 876             closedir(DIR);
 877
 878             # scan each .pod and .pm file for =item directives
 879             foreach $pod (@files) {
 880                 open(POD, "<$dirname/$pod") ||
 881                     die "$0: error opening $dirname/$pod for input: $!\n";
 882                 @poddata = <POD>;
 883                 close(POD);
 884                 clean_data( \@poddata );
 885
 886                 scan_items( \%Items, "$dirname/$pod", @poddata);
 887             }
 888
 889             # use the names of files as =item directives too.
 890 ### Don't think this should be done this way - confuses issues.(WL)
 891 ###         foreach $pod (@files) {
 892 ###             $pod =~ /^(.*)(\.pod|\.pm)$/;
 893 ###             $Items{$1} = "$dirname/$1.html" if $1;
 894 ###         }
 895         } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
 896                  $Pages{$libpod} =~ /([^:]*\.pm):/) {
 897             # scan the .pod or .pm file for =item directives
 898             $pod = $1;
 899             open(POD, "<$pod") ||
 900                 die "$0: error opening $pod for input: $!\n";
 901             @poddata = <POD>;
 902             close(POD);
 903             clean_data( \@poddata );
 904
 905             scan_items( \%Items, "$pod", @poddata);
 906         } else {
 907             warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
 908         }
 909     }
 910     @poddata = ();      # clean-up a bit
 911
 912     chdir($pwd)
 913         || die "$0: error changing to directory $pwd: $!\n";
 914
 915     # cache the item list for later use
 916     warn "caching items for later use\n" if $Verbose;
 917     open(CACHE, ">$Itemcache") ||
 918         die "$0: error open $Itemcache for writing: $!\n";
 919
 920     print CACHE join(":", @Podpath) . "\n$podroot\n";
 921     foreach my $key (keys %Items) {
 922         print CACHE "$key $Items{$key}\n";
 923     }
 924
 925     close(CACHE);
 926
 927     # cache the directory list for later use
 928     warn "caching directories for later use\n" if $Verbose;
 929     open(CACHE, ">$Dircache") ||
 930         die "$0: error open $Dircache for writing: $!\n";
 931
 932     print CACHE join(":", @Podpath) . "\n$podroot\n";
 933     foreach my $key (keys %Pages) {
 934         print CACHE "$key $Pages{$key}\n";
 935     }
 936
 937     close(CACHE);
 938 }
 939
 940 #
 941 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 942 #  files, and .pm files.  notes those that it finds.  this information will
 943 #  be used later in order to figure out where the pages specified in L<>
 944 #  links are on the filesystem.
 945 #
 946 sub scan_dir {
 947     my($dir, $recurse) = @_;
 948     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 949     local $_;
 950
 951     @subdirs = ();
 952     @pods = ();
 953
 954     opendir(DIR, $dir) ||
 955         die "$0: error opening directory $dir: $!\n";
 956     while (defined($_ = readdir(DIR))) {
 957         if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
 958             && ($HiddenDirs || !/^\./)
 959         ) {         # directory
 960             $Pages{$_}  = "" unless defined $Pages{$_};
 961             $Pages{$_} .= "$dir/$_:";
 962             push(@subdirs, $_);
 963         } elsif (/\.pod\z/) {                               # .pod
 964             s/\.pod\z//;
 965             $Pages{$_}  = "" unless defined $Pages{$_};
 966             $Pages{$_} .= "$dir/$_.pod:";
 967             push(@pods, "$dir/$_.pod");
 968         } elsif (/\.html\z/) {                              # .html
 969             s/\.html\z//;
 970             $Pages{$_}  = "" unless defined $Pages{$_};
 971             $Pages{$_} .= "$dir/$_.pod:";
 972         } elsif (/\.pm\z/) {                                # .pm
 973             s/\.pm\z//;
 974             $Pages{$_}  = "" unless defined $Pages{$_};
 975             $Pages{$_} .= "$dir/$_.pm:";
 976             push(@pods, "$dir/$_.pm");
 977         } elsif (-T "$dir/$_") {                            # script(?)
 978             local *F;
 979             if (open(F, "$dir/$_")) {
 980                 my $line;
 981                 while (defined($line = <F>)) {
 982                     if ($line =~ /^=(?:pod|head1)/) {
 983                         $Pages{$_}  = "" unless defined $Pages{$_};
 984                         $Pages{$_} .= "$dir/$_.pod:";
 985                         last;
 986                     }
 987                 }
 988                 close(F);
 989             }
 990         }
 991     }
 992     closedir(DIR);
 993
 994     # recurse on the subdirectories if necessary
 995     if ($recurse) {
 996         foreach my $subdir (@subdirs) {
 997             scan_dir("$dir/$subdir", $recurse);
 998         }
 999     }
1000 }
1001
1002 #
1003 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1004 #  build an index.
1005 #
1006 sub scan_headings {
1007     my($sections, @data) = @_;
1008     my($tag, $which_head, $otitle, $listdepth, $index);
1009
1010     local $Ignore = 0;
1011
1012     $listdepth = 0;
1013     $index = "";
1014
1015     # scan for =head directives, note their name, and build an index
1016     #  pointing to each of them.
1017     foreach my $line (@data) {
1018       if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1019         ($tag, $which_head, $otitle) = ($1,$2,$3);
1020
1021         my $title = depod( $otitle );
1022         my $name = anchorify( $title );
1023         $$sections{$name} = 1;
1024         $title = process_text( \$otitle );
1025
1026             while ($which_head != $listdepth) {
1027                 if ($which_head > $listdepth) {
1028                     $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1029                     $listdepth++;
1030                 } elsif ($which_head < $listdepth) {
1031                     $listdepth--;
1032                     $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1033                 }
1034             }
1035
1036             $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1037                       "<a href=\"#" . $name . "\">" .
1038                       $title . "</a></li>";
1039         }
1040     }
1041
1042     # finish off the lists
1043     while ($listdepth--) {
1044         $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1045     }
1046
1047     # get rid of bogus lists
1048     $index =~ s,\t*<ul>\s*</ul>\n,,g;
1049
1050     return $index;
1051 }
1052
1053 #
1054 # scan_items - scans the pod specified by $pod for =item directives.  we
1055 #  will use this information later on in resolving C<> links.
1056 #
1057 sub scan_items {
1058     my( $itemref, $pod, @poddata ) = @_;
1059     my($i, $item);
1060     local $_;
1061
1062     $pod =~ s/\.pod\z//;
1063     $pod .= ".html" if $pod;
1064
1065     foreach $i (0..$#poddata) {
1066         my $txt = depod( $poddata[$i] );
1067
1068         # figure out what kind of item it is.
1069         # Build string for referencing this item.
1070         if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1071             next unless $1;
1072             $item = $1;
1073         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1074             $item = $1;
1075         } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1076             $item = $1;
1077         } else {
1078             next;
1079         }
1080         my $fid = fragment_id( $item );
1081         $$itemref{$fid} = "$pod" if $fid;
1082     }
1083 }
1084
1085 #
1086 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
1087 #
1088 sub process_head {
1089     my($tag, $heading, $hasindex) = @_;
1090
1091     # figure out the level of the =head
1092     $tag =~ /head([1-6])/;
1093     my $level = $1;
1094
1095     if( $Listlevel ){
1096         warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1097         while( $Listlevel ){
1098             process_back();
1099         }
1100     }
1101
1102     print HTML "<p>\n";
1103     if( $level == 1 && ! $Top ){
1104       print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1105         if $hasindex and $Backlink;
1106       print HTML "</p>\n<hr />\n"
1107     } else {
1108       print HTML "</p>\n";
1109     }
1110
1111     my $name = anchorify( depod( $heading ) );
1112     my $convert = process_text( \$heading );
1113     print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1114 }
1115
1116
1117 #
1118 # emit_item_tag - print an =item's text
1119 # Note: The global $EmittedItem is used for inhibiting self-references.
1120 #
1121 my $EmittedItem;
1122
1123 sub emit_item_tag($$$){
1124     my( $otext, $text, $compact ) = @_;
1125     my $item = fragment_id( $text );
1126
1127     $EmittedItem = $item;
1128     ### print STDERR "emit_item_tag=$item ($text)\n";
1129
1130     print HTML '<strong>';
1131     if ($Items_Named{$item}++) {
1132         print HTML process_text( \$otext );
1133     } else {
1134         my $name = 'item_' . $item;
1135         $name = anchorify($name);
1136         print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
1137     }
1138     print HTML "</strong>\n";
1139     undef( $EmittedItem );
1140 }
1141
1142 sub emit_li {
1143     my( $tag ) = @_;
1144     if( $Items_Seen[$Listlevel]++ == 0 ){
1145         push( @Listend, "</$tag>" );
1146         print HTML "<$tag>\n";
1147     }
1148     my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1149     print HTML "<$emitted>";
1150     return $emitted;
1151 }
1152
1153 #
1154 # process_item - convert a pod item tag and convert it to HTML format.
1155 #
1156 sub process_item {
1157     my( $otext ) = @_;
1158     my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
1159
1160     # lots of documents start a list without doing an =over.  this is
1161     # bad!  but, the proper thing to do seems to be to just assume
1162     # they did do an =over.  so warn them once and then continue.
1163     if( $Listlevel == 0 ){
1164         warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1165         process_over();
1166     }
1167
1168     # formatting: insert a paragraph if preceding item has >1 paragraph
1169     if( $After_Lpar ){
1170         print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1171         $After_Lpar = 0;
1172     }
1173
1174     # remove formatting instructions from the text
1175     my $text = depod( $otext );
1176
1177     my $emitted; # the tag actually emitted, used for closing
1178
1179     # all the list variants:
1180     if( $text =~ /\A\*/ ){ # bullet
1181         $emitted = emit_li( 'ul' );
1182         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1183             my $tag = $1;
1184             $otext =~ s/\A\*\s+//;
1185             emit_item_tag( $otext, $tag, 1 );
1186         }
1187
1188     } elsif( $text =~ /\A\d+/ ){ # numbered list
1189         $emitted = emit_li( 'ol' );
1190         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1191             my $tag = $1;
1192             $otext =~ s/\A\d+\.?\s*//;
1193             emit_item_tag( $otext, $tag, 1 );
1194         }
1195
1196     } else {                    # definition list
1197         $emitted = emit_li( 'dl' );
1198         if ($text =~ /\A(.+)\Z/s ){ # should have text
1199             emit_item_tag( $otext, $text, 1 );
1200         }
1201         $need_dd = 1;
1202     }
1203     print HTML "\n";
1204     return $need_dd;
1205 }
1206
1207 #
1208 # process_over - process a pod over tag and start a corresponding HTML list.
1209 #
1210 sub process_over {
1211     # start a new list
1212     $Listlevel++;
1213     push( @Items_Seen, 0 );
1214     $After_Lpar = 0;
1215 }
1216
1217 #
1218 # process_back - process a pod back tag and convert it to HTML format.
1219 #
1220 sub process_back {
1221     my $need_dd = shift;
1222     if( $Listlevel == 0 ){
1223         warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1224         return;
1225     }
1226
1227     # close off the list.  note, I check to see if $Listend[$Listlevel] is
1228     # defined because an =item directive may have never appeared and thus
1229     # $Listend[$Listlevel] may have never been initialized.
1230     $Listlevel--;
1231     if( defined $Listend[$Listlevel] ){
1232         print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1233         print HTML $Listend[$Listlevel];
1234         print HTML "\n";
1235         pop( @Listend );
1236     }
1237     $After_Lpar = 0;
1238
1239     # clean up item count
1240     pop( @Items_Seen );
1241 }
1242
1243 #
1244 # process_cut - process a pod cut tag, thus start ignoring pod directives.
1245 #
1246 sub process_cut {
1247     $Ignore = 1;
1248 }
1249
1250 #
1251 # process_pod - process a pod tag, thus stop ignoring pod directives
1252 # until we see a corresponding cut.
1253 #
1254 sub process_pod {
1255     # no need to set $Ignore to 0 cause the main loop did it
1256 }
1257
1258 #
1259 # process_for - process a =for pod tag.  if it's for html, spit
1260 # it out verbatim, if illustration, center it, otherwise ignore it.
1261 #
1262 sub process_for {
1263     my($whom, $text) = @_;
1264     if ( $whom =~ /^(pod2)?html$/i) {
1265         print HTML $text;
1266     } elsif ($whom =~ /^illustration$/i) {
1267         1 while chomp $text;
1268         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1269           $text .= $ext, last if -r "$text$ext";
1270         }
1271         print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1272     }
1273 }
1274
1275 #
1276 # process_begin - process a =begin pod tag.  this pushes
1277 # whom we're beginning on the begin stack.  if there's a
1278 # begin stack, we only print if it us.
1279 #
1280 sub process_begin {
1281     my($whom, $text) = @_;
1282     $whom = lc($whom);
1283     push (@Begin_Stack, $whom);
1284     if ( $whom =~ /^(pod2)?html$/) {
1285         print HTML $text if $text;
1286     }
1287 }
1288
1289 #
1290 # process_end - process a =end pod tag.  pop the
1291 # begin stack.  die if we're mismatched.
1292 #
1293 sub process_end {
1294     my($whom, $text) = @_;
1295     $whom = lc($whom);
1296     if ($Begin_Stack[-1] ne $whom ) {
1297         die "Unmatched begin/end at chunk $Paragraph\n"
1298     }
1299     pop( @Begin_Stack );
1300 }
1301
1302 #
1303 # process_pre - indented paragraph, made into <pre></pre>
1304 #
1305 sub process_pre {
1306     my( $text ) = @_;
1307     my( $rest );
1308     return if $Ignore;
1309
1310     $rest = $$text;
1311
1312     # insert spaces in place of tabs
1313     $rest =~ s#(.+)#
1314             my $line = $1;
1315             1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1316             $line;
1317         #eg;
1318
1319     # convert some special chars to HTML escapes
1320     $rest = html_escape($rest);
1321
1322     # try and create links for all occurrences of perl.* within
1323     # the preformatted text.
1324     $rest =~ s{
1325                  (\s*)(perl\w+)
1326               }{
1327                  if ( defined $Pages{$2} ){     # is a link
1328                      qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1329                  } elsif (defined $Pages{dosify($2)}) { # is a link
1330                      qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1331                  } else {
1332                      "$1$2";
1333                  }
1334               }xeg;
1335      $rest =~ s{
1336                  (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1337                }{
1338                   my $url ;
1339                   if ( $Htmlfileurl ne '' ){
1340                      # Here, we take advantage of the knowledge
1341                      # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1342                      # Since $Htmlroot eq '', we need to prepend $Htmldir
1343                      # on the fron of the link to get the absolute path
1344                      # of the link's target. We check for a leading '/'
1345                      # to avoid corrupting links that are #, file:, etc.
1346                      my $old_url = $3 ;
1347                      $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1348                      $url = relativize_url( "$old_url.html", $Htmlfileurl );
1349                   } else {
1350                      $url = "$3.html" ;
1351                   }
1352                   "$1$url" ;
1353                }xeg;
1354
1355     # Look for embedded URLs and make them into links.  We don't
1356     # relativize them since they are best left as the author intended.
1357
1358     my $urls = '(' . join ('|', qw{
1359                 http
1360                 telnet
1361                 mailto
1362                 news
1363                 gopher
1364                 file
1365                 wais
1366                 ftp
1367             } )
1368         . ')';
1369
1370     my $ltrs = '\w';
1371     my $gunk = '/#~:.?+=&%@!\-';
1372     my $punc = '.:!?\-;';
1373     my $any  = "${ltrs}${gunk}${punc}";
1374
1375     $rest =~ s{
1376         \b                      # start at word boundary
1377         (                       # begin $1  {
1378             $urls :             # need resource and a colon
1379             (?!:)               # Ignore File::, among others.
1380             [$any] +?           # followed by one or more of any valid
1381                                 #   character, but be conservative and
1382                                 #   take only what you need to....
1383         )                       # end   $1  }
1384         (?=
1385             &quot; &gt;         # maybe pre-quoted '<a href="...">'
1386         |                       # or:
1387             [$punc]*            # 0 or more punctuation
1388             (?:                 #   followed
1389                 [^$any]         #   by a non-url char
1390             |                   #   or
1391                 $               #   end of the string
1392             )                   #
1393         |                       # or else
1394             $                   #   then end of the string
1395         )
1396       }{<a href="$1">$1</a>}igox;
1397
1398     # text should be as it is (verbatim)
1399     $$text = $rest;
1400 }
1401
1402
1403 #
1404 # pure text processing
1405 #
1406 # pure_text/inIS_text: differ with respect to automatic C<> recognition.
1407 # we don't want this to happen within IS
1408 #
1409 sub pure_text($){
1410     my $text = shift();
1411     process_puretext( $text, \$PTQuote, 1 );
1412 }
1413
1414 sub inIS_text($){
1415     my $text = shift();
1416     process_puretext( $text, \$PTQuote, 0 );
1417 }
1418
1419 #
1420 # process_puretext - process pure text (without pod-escapes) converting
1421 #  double-quotes and handling implicit C<> links.
1422 #
1423 sub process_puretext {
1424     my($text, $quote, $notinIS) = @_;
1425
1426     ## Guessing at func() or [\$\@%&]*var references in plain text is destined
1427     ## to produce some strange looking ref's. uncomment to disable:
1428     ## $notinIS = 0;
1429
1430     my(@words, $lead, $trail);
1431
1432     # convert double-quotes to single-quotes
1433     if( $$quote && $text =~ s/"/''/s ){
1434         $$quote = 0;
1435     }
1436     while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1437     $$quote = 1 if $text =~ s/"/``/s;
1438
1439     # keep track of leading and trailing white-space
1440     $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1441     $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1442
1443     # split at space/non-space boundaries
1444     @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1445
1446     # process each word individually
1447     foreach my $word (@words) {
1448         # skip space runs
1449         next if $word =~ /^\s*$/;
1450         # see if we can infer a link
1451         if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
1452             # has parenthesis so should have been a C<> ref
1453             ## try for a pagename (perlXXX(1))?
1454             my( $func, $args ) = ( $1, $2 );
1455             if( $args =~ /^\d+$/ ){
1456                 my $url = page_sect( $word, '' );
1457                 if( defined $url ){
1458                     $word = "<a href=\"$url\">the $word manpage</a>";
1459                     next;
1460                 }
1461             }
1462             ## try function name for a link, append tt'ed argument list
1463             $word = emit_C( $func, '', "($args)");
1464
1465 #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1466 ##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1467 ##          # perl variables, should be a C<> ref
1468 ##          $word = emit_C( $word );
1469
1470         } elsif ($word =~ m,^\w+://\w,) {
1471             # looks like a URL
1472             # Don't relativize it: leave it as the author intended
1473             $word = qq(<a href="$word">$word</a>);
1474         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1475             # looks like an e-mail address
1476             my ($w1, $w2, $w3) = ("", $word, "");
1477             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1478             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1479             $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1480         } else {
1481             $word = html_escape($word) if $word =~ /["&<>]/;
1482         }
1483     }
1484
1485     # put everything back together
1486     return $lead . join( '', @words ) . $trail;
1487 }
1488
1489
1490 #
1491 # process_text - handles plaintext that appears in the input pod file.
1492 # there may be pod commands embedded within the text so those must be
1493 # converted to html commands.
1494 #
1495
1496 sub process_text1($$;$$);
1497 sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1498 sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
1499
1500 sub process_text {
1501     return if $Ignore;
1502     my( $tref ) = @_;
1503     my $res = process_text1( 0, $tref );
1504     $res =~ s/\s+$//s;
1505     $$tref = $res;
1506 }
1507
1508 sub process_text1($$;$$){
1509     my( $lev, $rstr, $func, $closing ) = @_;
1510     my $res = '';
1511
1512     unless (defined $func) {
1513         $func = '';
1514         $lev++;
1515     }
1516
1517     if( $func eq 'B' ){
1518         # B<text> - boldface
1519         $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1520
1521     } elsif( $func eq 'C' ){
1522         # C<code> - can be a ref or <code></code>
1523         # need to extract text
1524         my $par = go_ahead( $rstr, 'C', $closing );
1525
1526         ## clean-up of the link target
1527         my $text = depod( $par );
1528
1529         ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1530         ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1531
1532         $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1533
1534     } elsif( $func eq 'E' ){
1535         # E<x> - convert to character
1536         $$rstr =~ s/^([^>]*)>//;
1537         my $escape = $1;
1538         $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1539         $res = "&$escape;";
1540
1541     } elsif( $func eq 'F' ){
1542         # F<filename> - italizice
1543         $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1544
1545     } elsif( $func eq 'I' ){
1546         # I<text> - italizice
1547         $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1548
1549     } elsif( $func eq 'L' ){
1550         # L<link> - link
1551         ## L<text|cross-ref> => produce text, use cross-ref for linking
1552         ## L<cross-ref> => make text from cross-ref
1553         ## need to extract text
1554         my $par = go_ahead( $rstr, 'L', $closing );
1555
1556         # some L<>'s that shouldn't be:
1557         # a) full-blown URL's are emitted as-is
1558         if( $par =~ m{^\w+://}s ){
1559             return make_URL_href( $par );
1560         }
1561         # b) C<...> is stripped and treated as C<>
1562         if( $par =~ /^C<(.*)>$/ ){
1563             my $text = depod( $1 );
1564             return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1565         }
1566
1567         # analyze the contents
1568         $par =~ s/\n/ /g;   # undo word-wrapped tags
1569         my $opar = $par;
1570         my $linktext;
1571         if( $par =~ s{^([^|]+)\|}{} ){
1572             $linktext = $1;
1573         }
1574
1575         # make sure sections start with a /
1576         $par =~ s{^"}{/"};
1577
1578         my( $page, $section, $ident );
1579
1580         # check for link patterns
1581         if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1582             # we've got a name/ident (no quotes)
1583             ( $page, $ident ) = ( $1, $2 );
1584             ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1585
1586         } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1587             # even though this should be a "section", we go for ident first
1588             ( $page, $ident ) = ( $1, $2 );
1589             ### print STDERR "--> L<$par> to page $page, section $section\n";
1590
1591         } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1592             ( $page, $section ) = ( '', $par );
1593             ### print STDERR "--> L<$par> to void page, section $section\n";
1594
1595         } else {
1596             ( $page, $section ) = ( $par, '' );
1597             ### print STDERR "--> L<$par> to page $par, void section\n";
1598         }
1599
1600         # now, either $section or $ident is defined. the convoluted logic
1601         # below tries to resolve L<> according to what the user specified.
1602         # failing this, we try to find the next best thing...
1603         my( $url, $ltext, $fid );
1604
1605         RESOLVE: {
1606             if( defined $ident ){
1607                 ## try to resolve $ident as an item
1608                 ( $url, $fid ) = coderef( $page, $ident );
1609                 if( $url ){
1610                     if( ! defined( $linktext ) ){
1611                         $linktext = $ident;
1612                         $linktext .= " in " if $ident && $page;
1613                         $linktext .= "the $page manpage" if $page;
1614                     }
1615                     ###  print STDERR "got coderef url=$url\n";
1616                     last RESOLVE;
1617                 }
1618                 ## no luck: go for a section (auto-quoting!)
1619                 $section = $ident;
1620             }
1621             ## now go for a section
1622             my $htmlsection = htmlify( $section );
1623             $url = page_sect( $page, $htmlsection );
1624             if( $url ){
1625                 if( ! defined( $linktext ) ){
1626                     $linktext = $section;
1627                     $linktext .= " in " if $section && $page;
1628                     $linktext .= "the $page manpage" if $page;
1629                 }
1630                 ### print STDERR "got page/section url=$url\n";
1631                 last RESOLVE;
1632             }
1633             ## no luck: go for an ident
1634             if( $section ){
1635                 $ident = $section;
1636             } else {
1637                 $ident = $page;
1638                 $page  = undef();
1639             }
1640             ( $url, $fid ) = coderef( $page, $ident );
1641             if( $url ){
1642                 if( ! defined( $linktext ) ){
1643                     $linktext = $ident;
1644                     $linktext .= " in " if $ident && $page;
1645                     $linktext .= "the $page manpage" if $page;
1646                 }
1647                 ### print STDERR "got section=>coderef url=$url\n";
1648                 last RESOLVE;
1649             }
1650
1651             # warning; show some text.
1652             $linktext = $opar unless defined $linktext;
1653             warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
1654         }
1655
1656         # now we have a URL or just plain code
1657         $$rstr = $linktext . '>' . $$rstr;
1658         if( defined( $url ) ){
1659             $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1660         } else {
1661             $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1662         }
1663
1664     } elsif( $func eq 'S' ){
1665         # S<text> - non-breaking spaces
1666         $res = process_text1( $lev, $rstr );
1667         $res =~ s/ /&nbsp;/g;
1668
1669     } elsif( $func eq 'X' ){
1670         # X<> - ignore
1671         $$rstr =~ s/^[^>]*>//;
1672
1673     } elsif( $func eq 'Z' ){
1674         # Z<> - empty
1675         warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1676             unless $$rstr =~ s/^>// or $Quiet;
1677
1678     } else {
1679         my $term = pattern $closing;
1680         while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1681             # all others: either recurse into new function or
1682             # terminate at closing angle bracket(s)
1683             my $pt = $1;
1684             $pt .= $2 if !$3 &&  $lev == 1;
1685             $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1686             return $res if !$3 && $lev > 1;
1687             if( $3 ){
1688                 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1689             }
1690         }
1691         if( $lev == 1 ){
1692             $res .= pure_text( $$rstr );
1693         } else {
1694             warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n" unless $Quiet;
1695         }
1696     }
1697     return $res;
1698 }
1699
1700 #
1701 # go_ahead: extract text of an IS (can be nested)
1702 #
1703 sub go_ahead($$$){
1704     my( $rstr, $func, $closing ) = @_;
1705     my $res = '';
1706     my @closing = ($closing);
1707     while( $$rstr =~
1708       s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
1709         $res .= $1;
1710         unless( $3 ){
1711             shift @closing;
1712             return $res unless @closing;
1713         } else {
1714             unshift @closing, closing $4;
1715         }
1716         $res .= $2;
1717     }
1718     warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n" unless $Quiet;
1719     return $res;
1720 }
1721
1722 #
1723 # emit_C - output result of C<text>
1724 #    $text is the depod-ed text
1725 #
1726 sub emit_C($;$$){
1727     my( $text, $nocode, $args ) = @_;
1728     $args = '' unless defined $args;
1729     my $res;
1730     my( $url, $fid ) = coderef( undef(), $text );
1731
1732     # need HTML-safe text
1733     my $linktext = html_escape( "$text$args" );
1734
1735     if( defined( $url ) &&
1736         (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1737         $res = "<a href=\"$url\"><code>$linktext</code></a>";
1738     } elsif( 0 && $nocode ){
1739         $res = $linktext;
1740     } else {
1741         $res = "<code>$linktext</code>";
1742     }
1743     return $res;
1744 }
1745
1746 #
1747 # html_escape: make text safe for HTML
1748 #
1749 sub html_escape {
1750     my $rest = $_[0];
1751     $rest   =~ s/&/&amp;/g;
1752     $rest   =~ s/</&lt;/g;
1753     $rest   =~ s/>/&gt;/g;
1754     $rest   =~ s/"/&quot;/g;
1755     # &apos; is only in XHTML, not HTML4.  Be conservative
1756     #$rest   =~ s/'/&apos;/g;
1757     return $rest;
1758 }
1759
1760
1761 #
1762 # dosify - convert filenames to 8.3
1763 #
1764 sub dosify {
1765     my($str) = @_;
1766     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1767     if ($Is83) {
1768         $str = lc $str;
1769         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1770         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1771     }
1772     return $str;
1773 }
1774
1775 #
1776 # page_sect - make a URL from the text of a L<>
1777 #
1778 sub page_sect($$) {
1779     my( $page, $section ) = @_;
1780     my( $linktext, $page83, $link);     # work strings
1781
1782     # check if we know that this is a section in this page
1783     if (!defined $Pages{$page} && defined $Sections{$page}) {
1784         $section = $page;
1785         $page = "";
1786         ### print STDERR "reset page='', section=$section\n";
1787     }
1788
1789     $page83=dosify($page);
1790     $page=$page83 if (defined $Pages{$page83});
1791     if ($page eq "") {
1792         $link = "#" . anchorify( $section );
1793     } elsif ( $page =~ /::/ ) {
1794         $page =~ s,::,/,g;
1795         # Search page cache for an entry keyed under the html page name,
1796         # then look to see what directory that page might be in.  NOTE:
1797         # this will only find one page. A better solution might be to produce
1798         # an intermediate page that is an index to all such pages.
1799         my $page_name = $page ;
1800         $page_name =~ s,^.*/,,s ;
1801         if ( defined( $Pages{ $page_name } ) &&
1802              $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1803            ) {
1804             $page = $1 ;
1805         }
1806         else {
1807             # NOTE: This branch assumes that all A::B pages are located in
1808             # $Htmlroot/A/B.html . This is often incorrect, since they are
1809             # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
1810             # analyze the contents of %Pages and figure out where any
1811             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1812             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1813             # lib/A/B.pm. This is also limited, but it's an improvement.
1814             # Maybe a hints file so that the links point to the correct places
1815             # nonetheless?
1816
1817         }
1818         $link = "$Htmlroot/$page.html";
1819         $link .= "#" . anchorify( $section ) if ($section);
1820     } elsif (!defined $Pages{$page}) {
1821         $link = "";
1822     } else {
1823         $section = anchorify( $section ) if $section ne "";
1824         ### print STDERR "...section=$section\n";
1825
1826         # if there is a directory by the name of the page, then assume that an
1827         # appropriate section will exist in the subdirectory
1828 #       if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1829         if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1830             $link = "$Htmlroot/$1/$section.html";
1831             ### print STDERR "...link=$link\n";
1832
1833         # since there is no directory by the name of the page, the section will
1834         # have to exist within a .html of the same name.  thus, make sure there
1835         # is a .pod or .pm that might become that .html
1836         } else {
1837             $section = "#$section" if $section;
1838             ### print STDERR "...section=$section\n";
1839
1840             # check if there is a .pod with the page name.
1841             # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
1842             if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
1843                 $link = "$Htmlroot/$1.html$section";
1844             } else {
1845                 $link = "";
1846             }
1847         }
1848     }
1849
1850     if ($link) {
1851         # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
1852         # implies $Htmlroot eq ''. This means that the link in question
1853         # needs a prefix of $Htmldir if it begins with '/'. The test for
1854         # the initial '/' is done to avoid '#'-only links, and to allow
1855         # for other kinds of links, like file:, ftp:, etc.
1856         my $url ;
1857         if (  $Htmlfileurl ne '' ) {
1858             $link = "$Htmldir$link" if $link =~ m{^/}s;
1859             $url = relativize_url( $link, $Htmlfileurl );
1860 # print( "  b: [$link,$Htmlfileurl,$url]\n" );
1861         }
1862         else {
1863             $url = $link ;
1864         }
1865         return $url;
1866
1867     } else {
1868         return undef();
1869     }
1870 }
1871
1872 #
1873 # relativize_url - convert an absolute URL to one relative to a base URL.
1874 # Assumes both end in a filename.
1875 #
1876 sub relativize_url {
1877     my ($dest,$source) = @_ ;
1878
1879     my ($dest_volume,$dest_directory,$dest_file) =
1880         File::Spec::Unix->splitpath( $dest ) ;
1881     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1882
1883     my ($source_volume,$source_directory,$source_file) =
1884         File::Spec::Unix->splitpath( $source ) ;
1885     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1886
1887     my $rel_path = '' ;
1888     if ( $dest ne '' ) {
1889        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1890     }
1891
1892     if ( $rel_path ne ''                &&
1893          substr( $rel_path, -1 ) ne '/' &&
1894          substr( $dest_file, 0, 1 ) ne '#'
1895         ) {
1896         $rel_path .= "/$dest_file" ;
1897     }
1898     else {
1899         $rel_path .= "$dest_file" ;
1900     }
1901
1902     return $rel_path ;
1903 }
1904
1905
1906 #
1907 # coderef - make URL from the text of a C<>
1908 #
1909 sub coderef($$){
1910     my( $page, $item ) = @_;
1911     my( $url );
1912
1913     my $fid = fragment_id( $item );
1914     if( defined( $page ) && $page ne "" ){
1915         # we have been given a $page...
1916         $page =~ s{::}{/}g;
1917
1918         # Do we take it? Item could be a section!
1919         my $base = $Items{$fid} || "";
1920         $base =~ s{[^/]*/}{};
1921         if( $base ne "$page.html" ){
1922             ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
1923             $page = undef();
1924         }
1925
1926     } else {
1927         # no page - local items precede cached items
1928         if( defined( $fid ) ){
1929             if(  exists $Local_Items{$fid} ){
1930                 $page = $Local_Items{$fid};
1931             } else {
1932                 $page = $Items{$fid};
1933             }
1934         }
1935     }
1936
1937     # if there was a pod file that we found earlier with an appropriate
1938     # =item directive, then create a link to that page.
1939     if( defined $page ){
1940         if( $page ){
1941             if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
1942                 $page = $1 . '.html';
1943             }
1944             my $link = "$Htmlroot/$page#item_" . anchorify($fid);
1945
1946             # Here, we take advantage of the knowledge that $Htmlfileurl
1947             # ne '' implies $Htmlroot eq ''.
1948             if (  $Htmlfileurl ne '' ) {
1949                 $link = "$Htmldir$link" ;
1950                 $url = relativize_url( $link, $Htmlfileurl ) ;
1951             } else {
1952                 $url = $link ;
1953             }
1954         } else {
1955             $url = "#item_" . anchorify($fid);
1956         }
1957
1958         confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1959     }
1960     return( $url, $fid );
1961 }
1962
1963
1964
1965 #
1966 # Adapted from Nick Ing-Simmons' PodToHtml package.
1967 sub relative_url {
1968     my $source_file = shift ;
1969     my $destination_file = shift;
1970
1971     my $source = URI::file->new_abs($source_file);
1972     my $uo = URI::file->new($destination_file,$source)->abs;
1973     return $uo->rel->as_string;
1974 }
1975
1976
1977 #
1978 # finish_list - finish off any pending HTML lists.  this should be called
1979 # after the entire pod file has been read and converted.
1980 #
1981 sub finish_list {
1982     while ($Listlevel > 0) {
1983         print HTML "</dl>\n";
1984         $Listlevel--;
1985     }
1986 }
1987
1988 #
1989 # htmlify - converts a pod section specification to a suitable section
1990 # specification for HTML. Note that we keep spaces and special characters
1991 # except ", ? (Netscape problem) and the hyphen (writer's problem...).
1992 #
1993 sub htmlify {
1994     my( $heading) = @_;
1995     $heading =~ s/(\s+)/ /g;
1996     $heading =~ s/\s+\Z//;
1997     $heading =~ s/\A\s+//;
1998     # The hyphen is a disgrace to the English language.
1999     $heading =~ s/[-"?]//g;
2000     $heading = lc( $heading );
2001     return $heading;
2002 }
2003
2004 #
2005 # similar to htmlify, but turns non-alphanumerics into underscores
2006 #
2007 sub anchorify {
2008     my ($anchor) = @_;
2009     $anchor = htmlify($anchor);
2010     $anchor =~ s/\W/_/g;
2011     return $anchor;
2012 }
2013
2014 #
2015 # depod - convert text by eliminating all interior sequences
2016 # Note: can be called with copy or modify semantics
2017 #
2018 my %E2c;
2019 $E2c{lt}     = '<';
2020 $E2c{gt}     = '>';
2021 $E2c{sol}    = '/';
2022 $E2c{verbar} = '|';
2023 $E2c{amp}    = '&'; # in Tk's pods
2024
2025 sub depod1($;$$);
2026
2027 sub depod($){
2028     my $string;
2029     if( ref( $_[0] ) ){
2030         $string =  ${$_[0]};
2031         ${$_[0]} = depod1( \$string );
2032     } else {
2033         $string =  $_[0];
2034         depod1( \$string );
2035     }
2036 }
2037
2038 sub depod1($;$$){
2039   my( $rstr, $func, $closing ) = @_;
2040   my $res = '';
2041   return $res unless defined $$rstr;
2042   if( ! defined( $func ) ){
2043       # skip to next begin of an interior sequence
2044       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2045          # recurse into its text
2046           $res .= $1 . depod1( $rstr, $2, closing $3);
2047       }
2048       $res .= $$rstr;
2049   } elsif( $func eq 'E' ){
2050       # E<x> - convert to character
2051       $$rstr =~ s/^([^>]*)>//;
2052       $res .= $E2c{$1} || "";
2053   } elsif( $func eq 'X' ){
2054       # X<> - ignore
2055       $$rstr =~ s/^[^>]*>//;
2056   } elsif( $func eq 'Z' ){
2057       # Z<> - empty
2058       $$rstr =~ s/^>//;
2059   } else {
2060       # all others: either recurse into new function or
2061       # terminate at closing angle bracket
2062       my $term = pattern $closing;
2063       while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2064           $res .= $1;
2065           last unless $3;
2066           $res .= depod1( $rstr, $3, closing $4 );
2067       }
2068       ## If we're here and $2 ne '>': undelimited interior sequence.
2069       ## Ignored, as this is called without proper indication of where we are.
2070       ## Rely on process_text to produce diagnostics.
2071   }
2072   return $res;
2073 }
2074
2075 #
2076 # fragment_id - construct a fragment identifier from:
2077 #   a) =item text
2078 #   b) contents of C<...>
2079 #
2080 my @HC;
2081 sub fragment_id {
2082     my $text = shift();
2083     $text =~ s/\s+\Z//s;
2084     if( $text ){
2085         # a method or function?
2086         return $1 if $text =~ /(\w+)\s*\(/;
2087         return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2088
2089         # a variable name?
2090         return $1 if $text =~ /^([\$\@%*]\S+)/;
2091
2092         # some pattern matching operator?
2093         return $1 if $text =~ m|^(\w+/).*/\w*$|;
2094
2095         # fancy stuff... like "do { }"
2096         return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2097
2098         # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2099         # and some funnies with ... Module ...
2100         return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2101         return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2102
2103         # text? normalize!
2104         $text =~ s/\s+/_/sg;
2105         $text =~ s{(\W)}{
2106          defined( $HC[ord($1)] ) ? $HC[ord($1)]
2107                  : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2108         $text = substr( $text, 0, 50 );
2109     } else {
2110         return undef();
2111     }
2112 }
2113
2114 #
2115 # make_URL_href - generate HTML href from URL
2116 # Special treatment for CGI queries.
2117 #
2118 sub make_URL_href($){
2119     my( $url ) = @_;
2120     if( $url !~
2121         s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2122         $url = "<a href=\"$url\">$url</a>";
2123     }
2124     return $url;
2125 }
2126
2127 1;