lib/Pod/Html.pm

   1 package Pod::Html;
   2 use strict;
   3 require Exporter;
   4
   5 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
   6 $VERSION = 1.07;
   7 @ISA = qw(Exporter);
   8 @EXPORT = qw(pod2html htmlify);
   9 @EXPORT_OK = qw(anchorify);
  10
  11 use Carp;
  12 use Config;
  13 use Cwd;
  14 use File::Spec;
  15 use File::Spec::Unix;
  16 use Getopt::Long;
  17
  18 use locale;     # make \w work right in non-ASCII lands
  19
  20 =head1 NAME
  21
  22 Pod::Html - module to convert pod files to HTML
  23
  24 =head1 SYNOPSIS
  25
  26     use Pod::Html;
  27     pod2html([options]);
  28
  29 =head1 DESCRIPTION
  30
  31 Converts files from pod format (see L<perlpod>) to HTML format.  It
  32 can automatically generate indexes and cross-references, and it keeps
  33 a cache of things it knows how to cross-reference.
  34
  35 =head1 FUNCTIONS
  36
  37 =head2 pod2html
  38
  39     pod2html("pod2html",
  40              "--podpath=lib:ext:pod:vms",
  41              "--podroot=/usr/src/perl",
  42              "--htmlroot=/perl/nmanual",
  43              "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
  44              "--recurse",
  45              "--infile=foo.pod",
  46              "--outfile=/perl/nmanual/foo.html");
  47
  48 pod2html takes the following arguments:
  49
  50 =over 4
  51
  52 =item backlink
  53
  54     --backlink="Back to Top"
  55
  56 Adds "Back to Top" links in front of every C<head1> heading (except for
  57 the first).  By default, no backlinks are generated.
  58
  59 =item cachedir
  60
  61     --cachedir=name
  62
  63 Creates the item and directory caches in the given directory.
  64
  65 =item css
  66
  67     --css=stylesheet
  68
  69 Specify the URL of a cascading style sheet.  Also disables all HTML/CSS
  70 C<style> attributes that are output by default (to avoid conflicts).
  71
  72 =item flush
  73
  74     --flush
  75
  76 Flushes the item and directory caches.
  77
  78 =item header
  79
  80     --header
  81     --noheader
  82
  83 Creates header and footer blocks containing the text of the C<NAME>
  84 section.  By default, no headers are generated.
  85
  86 =item help
  87
  88     --help
  89
  90 Displays the usage message.
  91
  92 =item hiddendirs
  93
  94     --hiddendirs
  95     --nohiddendirs
  96
  97 Include hidden directories in the search for POD's in podpath if recurse
  98 is set.
  99 The default is not to traverse any directory whose name begins with C<.>.
 100 See L</"podpath"> and L</"recurse">.
 101
 102 [This option is for backward compatibility only.
 103 It's hard to imagine that one would usefully create a module with a
 104 name component beginning with C<.>.]
 105
 106 =item htmldir
 107
 108     --htmldir=name
 109
 110 Sets the directory in which the resulting HTML file is placed.  This
 111 is used to generate relative links to other files. Not passing this
 112 causes all links to be absolute, since this is the value that tells
 113 Pod::Html the root of the documentation tree.
 114
 115 =item htmlroot
 116
 117     --htmlroot=name
 118
 119 Sets the base URL for the HTML files.  When cross-references are made,
 120 the HTML root is prepended to the URL.
 121
 122 =item index
 123
 124     --index
 125     --noindex
 126
 127 Generate an index at the top of the HTML file.  This is the default
 128 behaviour.
 129
 130 =item infile
 131
 132     --infile=name
 133
 134 Specify the pod file to convert.  Input is taken from STDIN if no
 135 infile is specified.
 136
 137 =item libpods
 138
 139     --libpods=name:...:name
 140
 141 List of page names (eg, "perlfunc") which contain linkable C<=item>s.
 142
 143 =item netscape
 144
 145     --netscape
 146     --nonetscape
 147
 148 B<Deprecated>, has no effect. For backwards compatibility only.
 149
 150 =item outfile
 151
 152     --outfile=name
 153
 154 Specify the HTML file to create.  Output goes to STDOUT if no outfile
 155 is specified.
 156
 157 =item podpath
 158
 159     --podpath=name:...:name
 160
 161 Specify which subdirectories of the podroot contain pod files whose
 162 HTML converted forms can be linked to in cross references.
 163
 164 =item podroot
 165
 166     --podroot=name
 167
 168 Specify the base directory for finding library pods.
 169
 170 =item quiet
 171
 172     --quiet
 173     --noquiet
 174
 175 Don't display I<mostly harmless> warning messages.  These messages
 176 will be displayed by default.  But this is not the same as C<verbose>
 177 mode.
 178
 179 =item recurse
 180
 181     --recurse
 182     --norecurse
 183
 184 Recurse into subdirectories specified in podpath (default behaviour).
 185
 186 =item title
 187
 188     --title=title
 189
 190 Specify the title of the resulting HTML file.
 191
 192 =item verbose
 193
 194     --verbose
 195     --noverbose
 196
 197 Display progress messages.  By default, they won't be displayed.
 198
 199 =back
 200
 201 =head2 htmlify
 202
 203     htmlify($heading);
 204
 205 Converts a pod section specification to a suitable section specification
 206 for HTML. Note that we keep spaces and special characters except
 207 C<", ?> (Netscape problem) and the hyphen (writer's problem...).
 208
 209 =head2 anchorify
 210
 211     anchorify(@heading);
 212
 213 Similar to C<htmlify()>, but turns non-alphanumerics into underscores.  Note
 214 that C<anchorify()> is not exported by default.
 215
 216 =head1 ENVIRONMENT
 217
 218 Uses C<$Config{pod2html}> to setup default options.
 219
 220 =head1 AUTHOR
 221
 222 Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
 223
 224 =head1 SEE ALSO
 225
 226 L<perlpod>
 227
 228 =head1 COPYRIGHT
 229
 230 This program is distributed under the Artistic License.
 231
 232 =cut
 233
 234
 235 my($Cachedir);
 236 my($Dircache, $Itemcache);
 237 my @Begin_Stack;
 238 my @Libpods;
 239 my($Htmlroot, $Htmldir, $Htmlfile, $Htmlfileurl);
 240 my($Podfile, @Podpath, $Podroot);
 241 my $Css;
 242
 243 my $Recurse;
 244 my $Quiet;
 245 my $HiddenDirs;
 246 my $Verbose;
 247 my $Doindex;
 248
 249 my $Backlink;
 250 my($Listlevel, @Listend);
 251 my $After_Lpar;
 252 use vars qw($Ignore);  # need to localize it later.
 253
 254 my(%Items_Named, @Items_Seen);
 255 my($Title, $Header);
 256
 257 my $Top;
 258 my $Paragraph;
 259
 260 my %Sections;
 261
 262 # Caches
 263 my %Pages = ();                 # associative array used to find the location
 264                                 #   of pages referenced by L<> links.
 265 my %Items = ();                 # associative array used to find the location
 266                                 #   of =item directives referenced by C<> links
 267
 268 my %Local_Items;
 269 my $Is83;
 270
 271 my $Curdir = File::Spec->curdir;
 272
 273 init_globals();
 274
 275 sub init_globals {
 276     $Cachedir = ".";            # The directory to which item and directory
 277                                 # caches will be written.
 278
 279     $Dircache = "pod2htmd.tmp";
 280     $Itemcache = "pod2htmi.tmp";
 281
 282     @Begin_Stack = ();          # begin/end stack
 283
 284     @Libpods = ();              # files to search for links from C<> directives
 285     $Htmlroot = "/";            # http-server base directory from which all
 286                                 #   relative paths in $podpath stem.
 287     $Htmldir = "";              # The directory to which the html pages
 288                                 # will (eventually) be written.
 289     $Htmlfile = "";             # write to stdout by default
 290     $Htmlfileurl = "" ;         # The url that other files would use to
 291                                 # refer to this file.  This is only used
 292                                 # to make relative urls that point to
 293                                 # other files.
 294
 295     $Podfile = "";              # read from stdin by default
 296     @Podpath = ();              # list of directories containing library pods.
 297     $Podroot = $Curdir;         # filesystem base directory from which all
 298                                 #   relative paths in $podpath stem.
 299     $Css = '';                  # Cascading style sheet
 300     $Recurse = 1;               # recurse on subdirectories in $podpath.
 301     $Quiet = 0;                 # not quiet by default
 302     $Verbose = 0;               # not verbose by default
 303     $Doindex = 1;               # non-zero if we should generate an index
 304     $Backlink = '';             # text for "back to top" links
 305     $Listlevel = 0;             # current list depth
 306     @Listend = ();              # the text to use to end the list.
 307     $After_Lpar = 0;            # set to true after a par in an =item
 308     $Ignore = 1;                # whether or not to format text.  we don't
 309                                 #   format text until we hit our first pod
 310                                 #   directive.
 311
 312     @Items_Seen = ();           # for multiples of the same item in perlfunc
 313     %Items_Named = ();
 314     $Header = 0;                # produce block header/footer
 315     $Title = '';                # title to give the pod(s)
 316     $Top = 1;                   # true if we are at the top of the doc.  used
 317                                 #   to prevent the first <hr /> directive.
 318     $Paragraph = '';            # which paragraph we're processing (used
 319                                 #   for error messages)
 320     %Sections = ();             # sections within this page
 321
 322     %Local_Items = ();
 323     $Is83 = $^O eq 'dos';       # Is it an 8.3 filesystem?
 324 }
 325
 326 #
 327 # clean_data: global clean-up of pod data
 328 #
 329 sub clean_data($){
 330     my( $dataref ) = @_;
 331     for my $i ( 0..$#{$dataref} ) {
 332         ${$dataref}[$i] =~ s/\s+\Z//;
 333
 334         # have a look for all-space lines
 335       if( ${$dataref}[$i] =~ /^\s+$/m and $dataref->[$i] !~ /^\s/ ){
 336             my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
 337             splice( @$dataref, $i, 1, @chunks );
 338         }
 339     }
 340 }
 341
 342
 343 sub pod2html {
 344     local(@ARGV) = @_;
 345     local($/);
 346     local $_;
 347
 348     init_globals();
 349
 350     $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
 351
 352     # cache of %Pages and %Items from last time we ran pod2html
 353
 354     #undef $opt_help if defined $opt_help;
 355
 356     # parse the command-line parameters
 357     parse_command_line();
 358
 359     # escape the backlink argument (same goes for title but is done later...)
 360     $Backlink = html_escape($Backlink) if defined $Backlink;
 361
 362     # set some variables to their default values if necessary
 363     local *POD;
 364     unless (@ARGV && $ARGV[0]) {
 365         $Podfile  = "-" unless $Podfile;        # stdin
 366         open(POD, "<$Podfile")
 367                 || die "$0: cannot open $Podfile file for input: $!\n";
 368     } else {
 369         $Podfile = $ARGV[0];  # XXX: might be more filenames
 370         *POD = *ARGV;
 371     }
 372     $Htmlfile = "-" unless $Htmlfile;   # stdout
 373     $Htmlroot = "" if $Htmlroot eq "/"; # so we don't get a //
 374     $Htmldir =~ s#/\z## ;               # so we don't get a //
 375     if (  $Htmlroot eq ''
 376        && defined( $Htmldir )
 377        && $Htmldir ne ''
 378        && substr( $Htmlfile, 0, length( $Htmldir ) ) eq $Htmldir
 379        )
 380     {
 381         # Set the 'base' url for this file, so that we can use it
 382         # as the location from which to calculate relative links
 383         # to other files. If this is '', then absolute links will
 384         # be used throughout.
 385         $Htmlfileurl= "$Htmldir/" . substr( $Htmlfile, length( $Htmldir ) + 1);
 386     }
 387
 388     # read the pod a paragraph at a time
 389     warn "Scanning for sections in input file(s)\n" if $Verbose;
 390     $/ = "";
 391     my @poddata  = <POD>;
 392     close(POD);
 393
 394     # be eol agnostic
 395     for (@poddata) {
 396         if (/\r/) {
 397             if (/\r\n/) {
 398                 @poddata = map { s/\r\n/\n/g;
 399                                  /\n\n/ ?
 400                                      map { "$_\n\n" } split /\n\n/ :
 401                                      $_ } @poddata;
 402             } else {
 403                 @poddata = map { s/\r/\n/g;
 404                                  /\n\n/ ?
 405                                      map { "$_\n\n" } split /\n\n/ :
 406                                      $_ } @poddata;
 407             }
 408             last;
 409         }
 410     }
 411
 412     clean_data( \@poddata );
 413
 414     # scan the pod for =head[1-6] directives and build an index
 415     my $index = scan_headings(\%Sections, @poddata);
 416
 417     unless($index) {
 418         warn "No headings in $Podfile\n" if $Verbose;
 419     }
 420
 421     # open the output file
 422     open(HTML, ">$Htmlfile")
 423             || die "$0: cannot open $Htmlfile file for output: $!\n";
 424
 425     # put a title in the HTML file if one wasn't specified
 426     if ($Title eq '') {
 427         TITLE_SEARCH: {
 428             for (my $i = 0; $i < @poddata; $i++) {
 429                 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
 430                     for my $para ( @poddata[$i, $i+1] ) {
 431                         last TITLE_SEARCH
 432                             if ($Title) = $para =~ /(\S+\s+-+.*\S)/s;
 433                     }
 434                 }
 435
 436             }
 437         }
 438     }
 439     if (!$Title and $Podfile =~ /\.pod\z/) {
 440         # probably a split pod so take first =head[12] as title
 441         for (my $i = 0; $i < @poddata; $i++) {
 442             last if ($Title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
 443         }
 444         warn "adopted '$Title' as title for $Podfile\n"
 445             if $Verbose and $Title;
 446     }
 447     if ($Title) {
 448         $Title =~ s/\s*\(.*\)//;
 449     } else {
 450         warn "$0: no title for $Podfile.\n" unless $Quiet;
 451         $Podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
 452         $Title = ($Podfile eq "-" ? 'No Title' : $1);
 453         warn "using $Title" if $Verbose;
 454     }
 455     $Title = html_escape($Title);
 456
 457     my $csslink = '';
 458     my $bodystyle = ' style="background-color: white"';
 459     my $tdstyle = ' style="background-color: #cccccc"';
 460
 461     if ($Css) {
 462       $csslink = qq(\n<link rel="stylesheet" href="$Css" type="text/css" />);
 463       $csslink =~ s,\\,/,g;
 464       $csslink =~ s,(/.):,$1|,;
 465       $bodystyle = '';
 466       $tdstyle = '';
 467     }
 468
 469       my $block = $Header ? <<END_OF_BLOCK : '';
 470 <table border="0" width="100%" cellspacing="0" cellpadding="3">
 471 <tr><td class="block"$tdstyle valign="middle">
 472 <big><strong><span class="block">&nbsp;$Title</span></strong></big>
 473 </td></tr>
 474 </table>
 475 END_OF_BLOCK
 476
 477     print HTML <<END_OF_HEAD;
 478 <?xml version="1.0" ?>
 479 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 480 <html xmlns="http://www.w3.org/1999/xhtml">
 481 <head>
 482 <title>$Title</title>$csslink
 483 <meta http-equiv="content-type" content="text/html; charset=utf-8" />
 484 <link rev="made" href="mailto:$Config{perladmin}" />
 485 </head>
 486
 487 <body$bodystyle>
 488 $block
 489 END_OF_HEAD
 490
 491     # load/reload/validate/cache %Pages and %Items
 492     get_cache($Dircache, $Itemcache, \@Podpath, $Podroot, $Recurse);
 493
 494     # scan the pod for =item directives
 495     scan_items( \%Local_Items, "", @poddata);
 496
 497     # put an index at the top of the file.  note, if $Doindex is 0 we
 498     # still generate an index, but surround it with an html comment.
 499     # that way some other program can extract it if desired.
 500     $index =~ s/--+/-/g;
 501     print HTML "<p><a name=\"__index__\"></a></p>\n";
 502     print HTML "<!-- INDEX BEGIN -->\n";
 503     print HTML "<!--\n" unless $Doindex;
 504     print HTML $index;
 505     print HTML "-->\n" unless $Doindex;
 506     print HTML "<!-- INDEX END -->\n\n";
 507     print HTML "<hr />\n" if $Doindex and $index;
 508
 509     # now convert this file
 510     my $after_item;             # set to true after an =item
 511     my $need_dd = 0;
 512     warn "Converting input file $Podfile\n" if $Verbose;
 513     foreach my $i (0..$#poddata){
 514         $_ = $poddata[$i];
 515         $Paragraph = $i+1;
 516         if (/^(=.*)/s) {        # is it a pod directive?
 517             $Ignore = 0;
 518             $after_item = 0;
 519             $need_dd = 0;
 520             $_ = $1;
 521             if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
 522                 process_begin($1, $2);
 523             } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
 524                 process_end($1, $2);
 525             } elsif (/^=cut/) {                 # =cut
 526                 process_cut();
 527             } elsif (/^=pod/) {                 # =pod
 528                 process_pod();
 529             } else {
 530                 next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
 531
 532                 if (/^=(head[1-6])\s+(.*\S)/s) {        # =head[1-6] heading
 533                     process_head( $1, $2, $Doindex && $index );
 534                 } elsif (/^=item\s*(.*\S)?/sm) {        # =item text
 535                     $need_dd = process_item( $1 );
 536                     $after_item = 1;
 537                 } elsif (/^=over\s*(.*)/) {             # =over N
 538                     process_over();
 539                 } elsif (/^=back/) {            # =back
 540                     process_back($need_dd);
 541                 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
 542                     process_for($1,$2);
 543                 } else {
 544                     /^=(\S*)\s*/;
 545                     warn "$0: $Podfile: unknown pod directive '$1' in "
 546                        . "paragraph $Paragraph.  ignoring.\n" unless $Quiet;
 547                 }
 548             }
 549             $Top = 0;
 550         }
 551         else {
 552             next if $Ignore;
 553             next if @Begin_Stack && $Begin_Stack[-1] ne 'html';
 554             print HTML and next if @Begin_Stack && $Begin_Stack[-1] eq 'html';
 555             print HTML "<dd>\n" if $need_dd;
 556             my $text = $_;
 557             if( $text =~ /\A\s+/ ){
 558                 process_pre( \$text );
 559                 print HTML "<pre>\n$text</pre>\n";
 560
 561             } else {
 562                 process_text( \$text );
 563
 564                 # experimental: check for a paragraph where all lines
 565                 # have some ...\t...\t...\n pattern
 566                 if( $text =~ /\t/ ){
 567                     my @lines = split( "\n", $text );
 568                     if( @lines > 1 ){
 569                         my $all = 2;
 570                         foreach my $line ( @lines ){
 571                             if( $line =~ /\S/ && $line !~ /\t/ ){
 572                                 $all--;
 573                                 last if $all == 0;
 574                             }
 575                         }
 576                         if( $all > 0 ){
 577                             $text =~ s/\t+/<td>/g;
 578                             $text =~ s/^/<tr><td>/gm;
 579                             $text = '<table cellspacing="0" cellpadding="0">' .
 580                                     $text . '</table>';
 581                         }
 582                     }
 583                 }
 584                 ## end of experimental
 585
 586                 if( $after_item ){
 587                     $After_Lpar = 1;
 588                 }
 589                 print HTML "<p>$text</p>\n";
 590             }
 591             print HTML "</dd>\n" if $need_dd;
 592             $after_item = 0;
 593         }
 594     }
 595
 596     # finish off any pending directives
 597     finish_list();
 598
 599     # link to page index
 600     print HTML "<p><a href=\"#__index__\"><small>$Backlink</small></a></p>\n"
 601         if $Doindex and $index and $Backlink;
 602
 603     print HTML <<END_OF_TAIL;
 604 $block
 605 </body>
 606
 607 </html>
 608 END_OF_TAIL
 609
 610     # close the html file
 611     close(HTML);
 612
 613     warn "Finished\n" if $Verbose;
 614 }
 615
 616 ##############################################################################
 617
 618 sub usage {
 619     my $podfile = shift;
 620     warn "$0: $podfile: @_\n" if @_;
 621     die <<END_OF_USAGE;
 622 Usage:  $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
 623            --podpath=<name>:...:<name> --podroot=<name>
 624            --libpods=<name>:...:<name> --recurse --verbose --index
 625            --netscape --norecurse --noindex --cachedir=<name>
 626
 627   --backlink     - set text for "back to top" links (default: none).
 628   --cachedir     - directory for the item and directory cache files.
 629   --css          - stylesheet URL
 630   --flush        - flushes the item and directory caches.
 631   --[no]header   - produce block header/footer (default is no headers).
 632   --help         - prints this message.
 633   --hiddendirs   - search hidden directories in podpath
 634   --htmldir      - directory for resulting HTML files.
 635   --htmlroot     - http-server base directory from which all relative paths
 636                    in podpath stem (default is /).
 637   --[no]index    - generate an index at the top of the resulting html
 638                    (default behaviour).
 639   --infile       - filename for the pod to convert (input taken from stdin
 640                    by default).
 641   --libpods      - colon-separated list of pages to search for =item pod
 642                    directives in as targets of C<> and implicit links (empty
 643                    by default).  note, these are not filenames, but rather
 644                    page names like those that appear in L<> links.
 645   --outfile      - filename for the resulting html file (output sent to
 646                    stdout by default).
 647   --podpath      - colon-separated list of directories containing library
 648                    pods (empty by default).
 649   --podroot      - filesystem base directory from which all relative paths
 650                    in podpath stem (default is .).
 651   --[no]quiet    - suppress some benign warning messages (default is off).
 652   --[no]recurse  - recurse on those subdirectories listed in podpath
 653                    (default behaviour).
 654   --title        - title that will appear in resulting html file.
 655   --[no]verbose  - self-explanatory (off by default).
 656   --[no]netscape - deprecated, has no effect. for backwards compatibility only.
 657
 658 END_OF_USAGE
 659
 660 }
 661
 662 sub parse_command_line {
 663     my ($opt_backlink,$opt_cachedir,$opt_css,$opt_flush,$opt_header,$opt_help,
 664         $opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,
 665         $opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,
 666         $opt_recurse,$opt_title,$opt_verbose,$opt_hiddendirs);
 667
 668     unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
 669     my $result = GetOptions(
 670                             'backlink=s' => \$opt_backlink,
 671                             'cachedir=s' => \$opt_cachedir,
 672                             'css=s'      => \$opt_css,
 673                             'flush'      => \$opt_flush,
 674                             'header!'    => \$opt_header,
 675                             'help'       => \$opt_help,
 676                             'hiddendirs!'=> \$opt_hiddendirs,
 677                             'htmldir=s'  => \$opt_htmldir,
 678                             'htmlroot=s' => \$opt_htmlroot,
 679                             'index!'     => \$opt_index,
 680                             'infile=s'   => \$opt_infile,
 681                             'libpods=s'  => \$opt_libpods,
 682                             'netscape!'  => \$opt_netscape,
 683                             'outfile=s'  => \$opt_outfile,
 684                             'podpath=s'  => \$opt_podpath,
 685                             'podroot=s'  => \$opt_podroot,
 686                             'quiet!'     => \$opt_quiet,
 687                             'recurse!'   => \$opt_recurse,
 688                             'title=s'    => \$opt_title,
 689                             'verbose!'   => \$opt_verbose,
 690                            );
 691     usage("-", "invalid parameters") if not $result;
 692
 693     usage("-") if defined $opt_help;    # see if the user asked for help
 694     $opt_help = "";                     # just to make -w shut-up.
 695
 696     @Podpath  = split(":", $opt_podpath) if defined $opt_podpath;
 697     @Libpods  = split(":", $opt_libpods) if defined $opt_libpods;
 698
 699     $Backlink = $opt_backlink if defined $opt_backlink;
 700     $Cachedir = $opt_cachedir if defined $opt_cachedir;
 701     $Css      = $opt_css      if defined $opt_css;
 702     $Header   = $opt_header   if defined $opt_header;
 703     $Htmldir  = $opt_htmldir  if defined $opt_htmldir;
 704     $Htmlroot = $opt_htmlroot if defined $opt_htmlroot;
 705     $Doindex  = $opt_index    if defined $opt_index;
 706     $Podfile  = $opt_infile   if defined $opt_infile;
 707     $HiddenDirs = $opt_hiddendirs if defined $opt_hiddendirs;
 708     $Htmlfile = $opt_outfile  if defined $opt_outfile;
 709     $Podroot  = $opt_podroot  if defined $opt_podroot;
 710     $Quiet    = $opt_quiet    if defined $opt_quiet;
 711     $Recurse  = $opt_recurse  if defined $opt_recurse;
 712     $Title    = $opt_title    if defined $opt_title;
 713     $Verbose  = $opt_verbose  if defined $opt_verbose;
 714
 715     warn "Flushing item and directory caches\n"
 716         if $opt_verbose && defined $opt_flush;
 717     $Dircache = "$Cachedir/pod2htmd.tmp";
 718     $Itemcache = "$Cachedir/pod2htmi.tmp";
 719     if (defined $opt_flush) {
 720         1 while unlink($Dircache, $Itemcache);
 721     }
 722 }
 723
 724
 725 my $Saved_Cache_Key;
 726
 727 sub get_cache {
 728     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 729     my @cache_key_args = @_;
 730
 731     # A first-level cache:
 732     # Don't bother reading the cache files if they still apply
 733     # and haven't changed since we last read them.
 734
 735     my $this_cache_key = cache_key(@cache_key_args);
 736
 737     return if $Saved_Cache_Key and $this_cache_key eq $Saved_Cache_Key;
 738
 739     # load the cache of %Pages and %Items if possible.  $tests will be
 740     # non-zero if successful.
 741     my $tests = 0;
 742     if (-f $dircache && -f $itemcache) {
 743         warn "scanning for item cache\n" if $Verbose;
 744         $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
 745     }
 746
 747     # if we didn't succeed in loading the cache then we must (re)build
 748     #  %Pages and %Items.
 749     if (!$tests) {
 750         warn "scanning directories in pod-path\n" if $Verbose;
 751         scan_podpath($podroot, $recurse, 0);
 752     }
 753     $Saved_Cache_Key = cache_key(@cache_key_args);
 754 }
 755
 756 sub cache_key {
 757     my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
 758     return join('!', $dircache, $itemcache, $recurse,
 759         @$podpath, $podroot, stat($dircache), stat($itemcache));
 760 }
 761
 762 #
 763 # load_cache - tries to find if the caches stored in $dircache and $itemcache
 764 #  are valid caches of %Pages and %Items.  if they are valid then it loads
 765 #  them and returns a non-zero value.
 766 #
 767 sub load_cache {
 768     my($dircache, $itemcache, $podpath, $podroot) = @_;
 769     my($tests);
 770     local $_;
 771
 772     $tests = 0;
 773
 774     open(CACHE, "<$itemcache") ||
 775         die "$0: error opening $itemcache for reading: $!\n";
 776     $/ = "\n";
 777
 778     # is it the same podpath?
 779     $_ = <CACHE>;
 780     chomp($_);
 781     $tests++ if (join(":", @$podpath) eq $_);
 782
 783     # is it the same podroot?
 784     $_ = <CACHE>;
 785     chomp($_);
 786     $tests++ if ($podroot eq $_);
 787
 788     # load the cache if its good
 789     if ($tests != 2) {
 790         close(CACHE);
 791         return 0;
 792     }
 793
 794     warn "loading item cache\n" if $Verbose;
 795     while (<CACHE>) {
 796         /(.*?) (.*)$/;
 797         $Items{$1} = $2;
 798     }
 799     close(CACHE);
 800
 801     warn "scanning for directory cache\n" if $Verbose;
 802     open(CACHE, "<$dircache") ||
 803         die "$0: error opening $dircache for reading: $!\n";
 804     $/ = "\n";
 805     $tests = 0;
 806
 807     # is it the same podpath?
 808     $_ = <CACHE>;
 809     chomp($_);
 810     $tests++ if (join(":", @$podpath) eq $_);
 811
 812     # is it the same podroot?
 813     $_ = <CACHE>;
 814     chomp($_);
 815     $tests++ if ($podroot eq $_);
 816
 817     # load the cache if its good
 818     if ($tests != 2) {
 819         close(CACHE);
 820         return 0;
 821     }
 822
 823     warn "loading directory cache\n" if $Verbose;
 824     while (<CACHE>) {
 825         /(.*?) (.*)$/;
 826         $Pages{$1} = $2;
 827     }
 828
 829     close(CACHE);
 830
 831     return 1;
 832 }
 833
 834 #
 835 # scan_podpath - scans the directories specified in @podpath for directories,
 836 #  .pod files, and .pm files.  it also scans the pod files specified in
 837 #  @Libpods for =item directives.
 838 #
 839 sub scan_podpath {
 840     my($podroot, $recurse, $append) = @_;
 841     my($pwd, $dir);
 842     my($libpod, $dirname, $pod, @files, @poddata);
 843
 844     unless($append) {
 845         %Items = ();
 846         %Pages = ();
 847     }
 848
 849     # scan each directory listed in @Podpath
 850     $pwd = getcwd();
 851     chdir($podroot)
 852         || die "$0: error changing to directory $podroot: $!\n";
 853     foreach $dir (@Podpath) {
 854         scan_dir($dir, $recurse);
 855     }
 856
 857     # scan the pods listed in @Libpods for =item directives
 858     foreach $libpod (@Libpods) {
 859         # if the page isn't defined then we won't know where to find it
 860         # on the system.
 861         next unless defined $Pages{$libpod} && $Pages{$libpod};
 862
 863         # if there is a directory then use the .pod and .pm files within it.
 864         # NOTE: Only finds the first so-named directory in the tree.
 865 #       if ($Pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
 866         if ($Pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
 867             #  find all the .pod and .pm files within the directory
 868             $dirname = $1;
 869             opendir(DIR, $dirname) ||
 870                 die "$0: error opening directory $dirname: $!\n";
 871             @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
 872             closedir(DIR);
 873
 874             # scan each .pod and .pm file for =item directives
 875             foreach $pod (@files) {
 876                 open(POD, "<$dirname/$pod") ||
 877                     die "$0: error opening $dirname/$pod for input: $!\n";
 878                 @poddata = <POD>;
 879                 close(POD);
 880                 clean_data( \@poddata );
 881
 882                 scan_items( \%Items, "$dirname/$pod", @poddata);
 883             }
 884
 885             # use the names of files as =item directives too.
 886 ### Don't think this should be done this way - confuses issues.(WL)
 887 ###         foreach $pod (@files) {
 888 ###             $pod =~ /^(.*)(\.pod|\.pm)$/;
 889 ###             $Items{$1} = "$dirname/$1.html" if $1;
 890 ###         }
 891         } elsif ($Pages{$libpod} =~ /([^:]*\.pod):/ ||
 892                  $Pages{$libpod} =~ /([^:]*\.pm):/) {
 893             # scan the .pod or .pm file for =item directives
 894             $pod = $1;
 895             open(POD, "<$pod") ||
 896                 die "$0: error opening $pod for input: $!\n";
 897             @poddata = <POD>;
 898             close(POD);
 899             clean_data( \@poddata );
 900
 901             scan_items( \%Items, "$pod", @poddata);
 902         } else {
 903             warn "$0: shouldn't be here (line ".__LINE__."\n" unless $Quiet;
 904         }
 905     }
 906     @poddata = ();      # clean-up a bit
 907
 908     chdir($pwd)
 909         || die "$0: error changing to directory $pwd: $!\n";
 910
 911     # cache the item list for later use
 912     warn "caching items for later use\n" if $Verbose;
 913     open(CACHE, ">$Itemcache") ||
 914         die "$0: error open $Itemcache for writing: $!\n";
 915
 916     print CACHE join(":", @Podpath) . "\n$podroot\n";
 917     foreach my $key (keys %Items) {
 918         print CACHE "$key $Items{$key}\n";
 919     }
 920
 921     close(CACHE);
 922
 923     # cache the directory list for later use
 924     warn "caching directories for later use\n" if $Verbose;
 925     open(CACHE, ">$Dircache") ||
 926         die "$0: error open $Dircache for writing: $!\n";
 927
 928     print CACHE join(":", @Podpath) . "\n$podroot\n";
 929     foreach my $key (keys %Pages) {
 930         print CACHE "$key $Pages{$key}\n";
 931     }
 932
 933     close(CACHE);
 934 }
 935
 936 #
 937 # scan_dir - scans the directory specified in $dir for subdirectories, .pod
 938 #  files, and .pm files.  notes those that it finds.  this information will
 939 #  be used later in order to figure out where the pages specified in L<>
 940 #  links are on the filesystem.
 941 #
 942 sub scan_dir {
 943     my($dir, $recurse) = @_;
 944     my($t, @subdirs, @pods, $pod, $dirname, @dirs);
 945     local $_;
 946
 947     @subdirs = ();
 948     @pods = ();
 949
 950     opendir(DIR, $dir) ||
 951         die "$0: error opening directory $dir: $!\n";
 952     while (defined($_ = readdir(DIR))) {
 953         if (-d "$dir/$_" && $_ ne "." && $_ ne ".."
 954             && ($HiddenDirs || !/^\./)
 955         ) {         # directory
 956             $Pages{$_}  = "" unless defined $Pages{$_};
 957             $Pages{$_} .= "$dir/$_:";
 958             push(@subdirs, $_);
 959         } elsif (/\.pod\z/) {                               # .pod
 960             s/\.pod\z//;
 961             $Pages{$_}  = "" unless defined $Pages{$_};
 962             $Pages{$_} .= "$dir/$_.pod:";
 963             push(@pods, "$dir/$_.pod");
 964         } elsif (/\.html\z/) {                              # .html
 965             s/\.html\z//;
 966             $Pages{$_}  = "" unless defined $Pages{$_};
 967             $Pages{$_} .= "$dir/$_.pod:";
 968         } elsif (/\.pm\z/) {                                # .pm
 969             s/\.pm\z//;
 970             $Pages{$_}  = "" unless defined $Pages{$_};
 971             $Pages{$_} .= "$dir/$_.pm:";
 972             push(@pods, "$dir/$_.pm");
 973         } elsif (-T "$dir/$_") {                            # script(?)
 974             local *F;
 975             if (open(F, "$dir/$_")) {
 976                 my $line;
 977                 while (defined($line = <F>)) {
 978                     if ($line =~ /^=(?:pod|head1)/) {
 979                         $Pages{$_}  = "" unless defined $Pages{$_};
 980                         $Pages{$_} .= "$dir/$_.pod:";
 981                         last;
 982                     }
 983                 }
 984                 close(F);
 985             }
 986         }
 987     }
 988     closedir(DIR);
 989
 990     # recurse on the subdirectories if necessary
 991     if ($recurse) {
 992         foreach my $subdir (@subdirs) {
 993             scan_dir("$dir/$subdir", $recurse);
 994         }
 995     }
 996 }
 997
 998 #
 999 # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
1000 #  build an index.
1001 #
1002 sub scan_headings {
1003     my($sections, @data) = @_;
1004     my($tag, $which_head, $otitle, $listdepth, $index);
1005
1006     local $Ignore = 0;
1007
1008     $listdepth = 0;
1009     $index = "";
1010
1011     # scan for =head directives, note their name, and build an index
1012     #  pointing to each of them.
1013     foreach my $line (@data) {
1014       if ($line =~ /^=(head)([1-6])\s+(.*)/) {
1015         ($tag, $which_head, $otitle) = ($1,$2,$3);
1016
1017         my $title = depod( $otitle );
1018         my $name = anchorify( $title );
1019         $$sections{$name} = 1;
1020         $title = process_text( \$otitle );
1021
1022             while ($which_head != $listdepth) {
1023                 if ($which_head > $listdepth) {
1024                     $index .= "\n" . ("\t" x $listdepth) . "<ul>\n";
1025                     $listdepth++;
1026                 } elsif ($which_head < $listdepth) {
1027                     $listdepth--;
1028                     $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1029                 }
1030             }
1031
1032             $index .= "\n" . ("\t" x $listdepth) . "<li>" .
1033                       "<a href=\"#" . $name . "\">" .
1034                       $title . "</a></li>";
1035         }
1036     }
1037
1038     # finish off the lists
1039     while ($listdepth--) {
1040         $index .= "\n" . ("\t" x $listdepth) . "</ul>\n";
1041     }
1042
1043     # get rid of bogus lists
1044     $index =~ s,\t*<ul>\s*</ul>\n,,g;
1045
1046     return $index;
1047 }
1048
1049 #
1050 # scan_items - scans the pod specified by $pod for =item directives.  we
1051 #  will use this information later on in resolving C<> links.
1052 #
1053 sub scan_items {
1054     my( $itemref, $pod, @poddata ) = @_;
1055     my($i, $item);
1056     local $_;
1057
1058     $pod =~ s/\.pod\z//;
1059     $pod .= ".html" if $pod;
1060
1061     foreach $i (0..$#poddata) {
1062         my $txt = depod( $poddata[$i] );
1063
1064         # figure out what kind of item it is.
1065         # Build string for referencing this item.
1066         if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
1067             next unless $1;
1068             $item = $1;
1069         } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
1070             $item = $1;
1071         } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
1072             $item = $1;
1073         } else {
1074             next;
1075         }
1076         my $fid = fragment_id( $item );
1077         $$itemref{$fid} = "$pod" if $fid;
1078     }
1079 }
1080
1081 #
1082 # process_head - convert a pod head[1-6] tag and convert it to HTML format.
1083 #
1084 sub process_head {
1085     my($tag, $heading, $hasindex) = @_;
1086
1087     # figure out the level of the =head
1088     $tag =~ /head([1-6])/;
1089     my $level = $1;
1090
1091     if( $Listlevel ){
1092         warn "$0: $Podfile: unterminated list at =head in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1093         while( $Listlevel ){
1094             process_back();
1095         }
1096     }
1097
1098     print HTML "<p>\n";
1099     if( $level == 1 && ! $Top ){
1100       print HTML "<a href=\"#__index__\"><small>$Backlink</small></a>\n"
1101         if $hasindex and $Backlink;
1102       print HTML "</p>\n<hr />\n"
1103     } else {
1104       print HTML "</p>\n";
1105     }
1106
1107     my $name = anchorify( depod( $heading ) );
1108     my $convert = process_text( \$heading );
1109     print HTML "<h$level><a name=\"$name\">$convert</a></h$level>\n";
1110 }
1111
1112
1113 #
1114 # emit_item_tag - print an =item's text
1115 # Note: The global $EmittedItem is used for inhibiting self-references.
1116 #
1117 my $EmittedItem;
1118
1119 sub emit_item_tag($$$){
1120     my( $otext, $text, $compact ) = @_;
1121     my $item = fragment_id( $text );
1122
1123     $EmittedItem = $item;
1124     ### print STDERR "emit_item_tag=$item ($text)\n";
1125
1126     print HTML '<strong>';
1127     if ($Items_Named{$item}++) {
1128         print HTML process_text( \$otext );
1129     } else {
1130         my $name = 'item_' . $item;
1131         $name = anchorify($name);
1132         print HTML qq{<a name="$name">}, process_text( \$otext ), '</a>';
1133     }
1134     print HTML "</strong>\n";
1135     undef( $EmittedItem );
1136 }
1137
1138 sub emit_li {
1139     my( $tag ) = @_;
1140     if( $Items_Seen[$Listlevel]++ == 0 ){
1141         push( @Listend, "</$tag>" );
1142         print HTML "<$tag>\n";
1143     }
1144     my $emitted = $tag eq 'dl' ? 'dt' : 'li';
1145     print HTML "<$emitted>";
1146     return $emitted;
1147 }
1148
1149 #
1150 # process_item - convert a pod item tag and convert it to HTML format.
1151 #
1152 sub process_item {
1153     my( $otext ) = @_;
1154     my $need_dd = 0; # set to 1 if we need a <dd></dd> after an item
1155
1156     # lots of documents start a list without doing an =over.  this is
1157     # bad!  but, the proper thing to do seems to be to just assume
1158     # they did do an =over.  so warn them once and then continue.
1159     if( $Listlevel == 0 ){
1160         warn "$0: $Podfile: unexpected =item directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1161         process_over();
1162     }
1163
1164     # formatting: insert a paragraph if preceding item has >1 paragraph
1165     if( $After_Lpar ){
1166         print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1167         $After_Lpar = 0;
1168     }
1169
1170     # remove formatting instructions from the text
1171     my $text = depod( $otext );
1172
1173     my $emitted; # the tag actually emitted, used for closing
1174
1175     # all the list variants:
1176     if( $text =~ /\A\*/ ){ # bullet
1177         $emitted = emit_li( 'ul' );
1178         if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1179             my $tag = $1;
1180             $otext =~ s/\A\*\s+//;
1181             emit_item_tag( $otext, $tag, 1 );
1182         }
1183
1184     } elsif( $text =~ /\A\d+/ ){ # numbered list
1185         $emitted = emit_li( 'ol' );
1186         if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1187             my $tag = $1;
1188             $otext =~ s/\A\d+\.?\s*//;
1189             emit_item_tag( $otext, $tag, 1 );
1190         }
1191
1192     } else {                    # definition list
1193         $emitted = emit_li( 'dl' );
1194         if ($text =~ /\A(.+)\Z/s ){ # should have text
1195             emit_item_tag( $otext, $text, 1 );
1196         }
1197         $need_dd = 1;
1198     }
1199     print HTML "\n";
1200     return $need_dd;
1201 }
1202
1203 #
1204 # process_over - process a pod over tag and start a corresponding HTML list.
1205 #
1206 sub process_over {
1207     # start a new list
1208     $Listlevel++;
1209     push( @Items_Seen, 0 );
1210     $After_Lpar = 0;
1211 }
1212
1213 #
1214 # process_back - process a pod back tag and convert it to HTML format.
1215 #
1216 sub process_back {
1217     my $need_dd = shift;
1218     if( $Listlevel == 0 ){
1219         warn "$0: $Podfile: unexpected =back directive in paragraph $Paragraph.  ignoring.\n" unless $Quiet;
1220         return;
1221     }
1222
1223     # close off the list.  note, I check to see if $Listend[$Listlevel] is
1224     # defined because an =item directive may have never appeared and thus
1225     # $Listend[$Listlevel] may have never been initialized.
1226     $Listlevel--;
1227     if( defined $Listend[$Listlevel] ){
1228         print HTML $need_dd ? "</dd>\n" : "</li>\n" if $After_Lpar;
1229         print HTML $Listend[$Listlevel];
1230         print HTML "\n";
1231         pop( @Listend );
1232     }
1233     $After_Lpar = 0;
1234
1235     # clean up item count
1236     pop( @Items_Seen );
1237 }
1238
1239 #
1240 # process_cut - process a pod cut tag, thus start ignoring pod directives.
1241 #
1242 sub process_cut {
1243     $Ignore = 1;
1244 }
1245
1246 #
1247 # process_pod - process a pod tag, thus stop ignoring pod directives
1248 # until we see a corresponding cut.
1249 #
1250 sub process_pod {
1251     # no need to set $Ignore to 0 cause the main loop did it
1252 }
1253
1254 #
1255 # process_for - process a =for pod tag.  if it's for html, spit
1256 # it out verbatim, if illustration, center it, otherwise ignore it.
1257 #
1258 sub process_for {
1259     my($whom, $text) = @_;
1260     if ( $whom =~ /^(pod2)?html$/i) {
1261         print HTML $text;
1262     } elsif ($whom =~ /^illustration$/i) {
1263         1 while chomp $text;
1264         for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1265           $text .= $ext, last if -r "$text$ext";
1266         }
1267         print HTML qq{<p align="center"><img src="$text" alt="$text illustration" /></p>};
1268     }
1269 }
1270
1271 #
1272 # process_begin - process a =begin pod tag.  this pushes
1273 # whom we're beginning on the begin stack.  if there's a
1274 # begin stack, we only print if it us.
1275 #
1276 sub process_begin {
1277     my($whom, $text) = @_;
1278     $whom = lc($whom);
1279     push (@Begin_Stack, $whom);
1280     if ( $whom =~ /^(pod2)?html$/) {
1281         print HTML $text if $text;
1282     }
1283 }
1284
1285 #
1286 # process_end - process a =end pod tag.  pop the
1287 # begin stack.  die if we're mismatched.
1288 #
1289 sub process_end {
1290     my($whom, $text) = @_;
1291     $whom = lc($whom);
1292     if ($Begin_Stack[-1] ne $whom ) {
1293         die "Unmatched begin/end at chunk $Paragraph\n"
1294     }
1295     pop( @Begin_Stack );
1296 }
1297
1298 #
1299 # process_pre - indented paragraph, made into <pre></pre>
1300 #
1301 sub process_pre {
1302     my( $text ) = @_;
1303     my( $rest );
1304     return if $Ignore;
1305
1306     $rest = $$text;
1307
1308     # insert spaces in place of tabs
1309     $rest =~ s#(.+)#
1310             my $line = $1;
1311             1 while $line =~ s/(\t+)/' ' x ((length($1) * 8) - $-[0] % 8)/e;
1312             $line;
1313         #eg;
1314
1315     # convert some special chars to HTML escapes
1316     $rest = html_escape($rest);
1317
1318     # try and create links for all occurrences of perl.* within
1319     # the preformatted text.
1320     $rest =~ s{
1321                  (\s*)(perl\w+)
1322               }{
1323                  if ( defined $Pages{$2} ){     # is a link
1324                      qq($1<a href="$Htmlroot/$Pages{$2}">$2</a>);
1325                  } elsif (defined $Pages{dosify($2)}) { # is a link
1326                      qq($1<a href="$Htmlroot/$Pages{dosify($2)}">$2</a>);
1327                  } else {
1328                      "$1$2";
1329                  }
1330               }xeg;
1331      $rest =~ s{
1332                  (<a\ href="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1333                }{
1334                   my $url ;
1335                   if ( $Htmlfileurl ne '' ){
1336                      # Here, we take advantage of the knowledge
1337                      # that $Htmlfileurl ne '' implies $Htmlroot eq ''.
1338                      # Since $Htmlroot eq '', we need to prepend $Htmldir
1339                      # on the fron of the link to get the absolute path
1340                      # of the link's target. We check for a leading '/'
1341                      # to avoid corrupting links that are #, file:, etc.
1342                      my $old_url = $3 ;
1343                      $old_url = "$Htmldir$old_url" if $old_url =~ m{^\/};
1344                      $url = relativize_url( "$old_url.html", $Htmlfileurl );
1345                   } else {
1346                      $url = "$3.html" ;
1347                   }
1348                   "$1$url" ;
1349                }xeg;
1350
1351     # Look for embedded URLs and make them into links.  We don't
1352     # relativize them since they are best left as the author intended.
1353
1354     my $urls = '(' . join ('|', qw{
1355                 http
1356                 telnet
1357                 mailto
1358                 news
1359                 gopher
1360                 file
1361                 wais
1362                 ftp
1363             } )
1364         . ')';
1365
1366     my $ltrs = '\w';
1367     my $gunk = '/#~:.?+=&%@!\-';
1368     my $punc = '.:!?\-;';
1369     my $any  = "${ltrs}${gunk}${punc}";
1370
1371     $rest =~ s{
1372         \b                      # start at word boundary
1373         (                       # begin $1  {
1374             $urls :             # need resource and a colon
1375             (?!:)               # Ignore File::, among others.
1376             [$any] +?           # followed by one or more of any valid
1377                                 #   character, but be conservative and
1378                                 #   take only what you need to....
1379         )                       # end   $1  }
1380         (?=
1381             &quot; &gt;         # maybe pre-quoted '<a href="...">'
1382         |                       # or:
1383             [$punc]*            # 0 or more punctuation
1384             (?:                 #   followed
1385                 [^$any]         #   by a non-url char
1386             |                   #   or
1387                 $               #   end of the string
1388             )                   #
1389         |                       # or else
1390             $                   #   then end of the string
1391         )
1392       }{<a href="$1">$1</a>}igox;
1393
1394     # text should be as it is (verbatim)
1395     $$text = $rest;
1396 }
1397
1398
1399 #
1400 # pure text processing
1401 #
1402 # pure_text/inIS_text: differ with respect to automatic C<> recognition.
1403 # we don't want this to happen within IS
1404 #
1405 sub pure_text($){
1406     my $text = shift();
1407     process_puretext( $text, 1 );
1408 }
1409
1410 sub inIS_text($){
1411     my $text = shift();
1412     process_puretext( $text, 0 );
1413 }
1414
1415 #
1416 # process_puretext - process pure text (without pod-escapes) converting
1417 #  double-quotes and handling implicit C<> links.
1418 #
1419 sub process_puretext {
1420     my($text, $notinIS) = @_;
1421
1422     ## Guessing at func() or [\$\@%&]*var references in plain text is destined
1423     ## to produce some strange looking ref's. uncomment to disable:
1424     ## $notinIS = 0;
1425
1426     my(@words, $lead, $trail);
1427
1428     # keep track of leading and trailing white-space
1429     $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
1430     $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
1431
1432     # split at space/non-space boundaries
1433     @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
1434
1435     # process each word individually
1436     foreach my $word (@words) {
1437         # skip space runs
1438         next if $word =~ /^\s*$/;
1439         # see if we can infer a link
1440         if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
1441             # has parenthesis so should have been a C<> ref
1442             ## try for a pagename (perlXXX(1))?
1443             my( $func, $args ) = ( $1, $2 );
1444             if( $args =~ /^\d+$/ ){
1445                 my $url = page_sect( $word, '' );
1446                 if( defined $url ){
1447                     $word = "<a href=\"$url\">the $word manpage</a>";
1448                     next;
1449                 }
1450             }
1451             ## try function name for a link, append tt'ed argument list
1452             $word = emit_C( $func, '', "($args)");
1453
1454 #### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1455 ##      } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1456 ##          # perl variables, should be a C<> ref
1457 ##          $word = emit_C( $word );
1458
1459         } elsif ($word =~ m,^\w+://\w,) {
1460             # looks like a URL
1461             # Don't relativize it: leave it as the author intended
1462             $word = qq(<a href="$word">$word</a>);
1463         } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
1464             # looks like an e-mail address
1465             my ($w1, $w2, $w3) = ("", $word, "");
1466             ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1467             ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1468             $word = qq($w1<a href="mailto:$w2">$w2</a>$w3);
1469         } else {
1470             $word = html_escape($word) if $word =~ /["&<>]/;
1471         }
1472     }
1473
1474     # put everything back together
1475     return $lead . join( '', @words ) . $trail;
1476 }
1477
1478
1479 #
1480 # process_text - handles plaintext that appears in the input pod file.
1481 # there may be pod commands embedded within the text so those must be
1482 # converted to html commands.
1483 #
1484
1485 sub process_text1($$;$$);
1486 sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1487 sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
1488
1489 sub process_text {
1490     return if $Ignore;
1491     my( $tref ) = @_;
1492     my $res = process_text1( 0, $tref );
1493     $res =~ s/\s+$//s;
1494     $$tref = $res;
1495 }
1496
1497 sub process_text1($$;$$){
1498     my( $lev, $rstr, $func, $closing ) = @_;
1499     my $res = '';
1500
1501     unless (defined $func) {
1502         $func = '';
1503         $lev++;
1504     }
1505
1506     if( $func eq 'B' ){
1507         # B<text> - boldface
1508         $res = '<strong>' . process_text1( $lev, $rstr ) . '</strong>';
1509
1510     } elsif( $func eq 'C' ){
1511         # C<code> - can be a ref or <code></code>
1512         # need to extract text
1513         my $par = go_ahead( $rstr, 'C', $closing );
1514
1515         ## clean-up of the link target
1516         my $text = depod( $par );
1517
1518         ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1519         ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1520
1521         $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1522
1523     } elsif( $func eq 'E' ){
1524         # E<x> - convert to character
1525         $$rstr =~ s/^([^>]*)>//;
1526         my $escape = $1;
1527         $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1528         $res = "&$escape;";
1529
1530     } elsif( $func eq 'F' ){
1531         # F<filename> - italizice
1532         $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1533
1534     } elsif( $func eq 'I' ){
1535         # I<text> - italizice
1536         $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1537
1538     } elsif( $func eq 'L' ){
1539         # L<link> - link
1540         ## L<text|cross-ref> => produce text, use cross-ref for linking
1541         ## L<cross-ref> => make text from cross-ref
1542         ## need to extract text
1543         my $par = go_ahead( $rstr, 'L', $closing );
1544
1545         # some L<>'s that shouldn't be:
1546         # a) full-blown URL's are emitted as-is
1547         if( $par =~ m{^\w+://}s ){
1548             return make_URL_href( $par );
1549         }
1550         # b) C<...> is stripped and treated as C<>
1551         if( $par =~ /^C<(.*)>$/ ){
1552             my $text = depod( $1 );
1553             return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1554         }
1555
1556         # analyze the contents
1557         $par =~ s/\n/ /g;   # undo word-wrapped tags
1558         my $opar = $par;
1559         my $linktext;
1560         if( $par =~ s{^([^|]+)\|}{} ){
1561             $linktext = $1;
1562         }
1563
1564         # make sure sections start with a /
1565         $par =~ s{^"}{/"};
1566
1567         my( $page, $section, $ident );
1568
1569         # check for link patterns
1570         if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){     # name/ident
1571             # we've got a name/ident (no quotes)
1572             ( $page, $ident ) = ( $1, $2 );
1573             ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1574
1575         } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1576             # even though this should be a "section", we go for ident first
1577             ( $page, $ident ) = ( $1, $2 );
1578             ### print STDERR "--> L<$par> to page $page, section $section\n";
1579
1580         } elsif( $par =~ /\s/ ){  # this must be a section with missing quotes
1581             ( $page, $section ) = ( '', $par );
1582             ### print STDERR "--> L<$par> to void page, section $section\n";
1583
1584         } else {
1585             ( $page, $section ) = ( $par, '' );
1586             ### print STDERR "--> L<$par> to page $par, void section\n";
1587         }
1588
1589         # now, either $section or $ident is defined. the convoluted logic
1590         # below tries to resolve L<> according to what the user specified.
1591         # failing this, we try to find the next best thing...
1592         my( $url, $ltext, $fid );
1593
1594         RESOLVE: {
1595             if( defined $ident ){
1596                 ## try to resolve $ident as an item
1597                 ( $url, $fid ) = coderef( $page, $ident );
1598                 if( $url ){
1599                     if( ! defined( $linktext ) ){
1600                         $linktext = $ident;
1601                         $linktext .= " in " if $ident && $page;
1602                         $linktext .= "the $page manpage" if $page;
1603                     }
1604                     ###  print STDERR "got coderef url=$url\n";
1605                     last RESOLVE;
1606                 }
1607                 ## no luck: go for a section (auto-quoting!)
1608                 $section = $ident;
1609             }
1610             ## now go for a section
1611             my $htmlsection = htmlify( $section );
1612             $url = page_sect( $page, $htmlsection );
1613             if( $url ){
1614                 if( ! defined( $linktext ) ){
1615                     $linktext = $section;
1616                     $linktext .= " in " if $section && $page;
1617                     $linktext .= "the $page manpage" if $page;
1618                 }
1619                 ### print STDERR "got page/section url=$url\n";
1620                 last RESOLVE;
1621             }
1622             ## no luck: go for an ident
1623             if( $section ){
1624                 $ident = $section;
1625             } else {
1626                 $ident = $page;
1627                 $page  = undef();
1628             }
1629             ( $url, $fid ) = coderef( $page, $ident );
1630             if( $url ){
1631                 if( ! defined( $linktext ) ){
1632                     $linktext = $ident;
1633                     $linktext .= " in " if $ident && $page;
1634                     $linktext .= "the $page manpage" if $page;
1635                 }
1636                 ### print STDERR "got section=>coderef url=$url\n";
1637                 last RESOLVE;
1638             }
1639
1640             # warning; show some text.
1641             $linktext = $opar unless defined $linktext;
1642             warn "$0: $Podfile: cannot resolve L<$opar> in paragraph $Paragraph.\n" unless $Quiet;
1643         }
1644
1645         # now we have a URL or just plain code
1646         $$rstr = $linktext . '>' . $$rstr;
1647         if( defined( $url ) ){
1648             $res = "<a href=\"$url\">" . process_text1( $lev, $rstr ) . '</a>';
1649         } else {
1650             $res = '<em>' . process_text1( $lev, $rstr ) . '</em>';
1651         }
1652
1653     } elsif( $func eq 'S' ){
1654         # S<text> - non-breaking spaces
1655         $res = process_text1( $lev, $rstr );
1656         $res =~ s/ /&nbsp;/g;
1657
1658     } elsif( $func eq 'X' ){
1659         # X<> - ignore
1660         $$rstr =~ s/^[^>]*>//;
1661
1662     } elsif( $func eq 'Z' ){
1663         # Z<> - empty
1664         warn "$0: $Podfile: invalid X<> in paragraph $Paragraph.\n"
1665             unless $$rstr =~ s/^>// or $Quiet;
1666
1667     } else {
1668         my $term = pattern $closing;
1669         while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
1670             # all others: either recurse into new function or
1671             # terminate at closing angle bracket(s)
1672             my $pt = $1;
1673             $pt .= $2 if !$3 &&  $lev == 1;
1674             $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1675             return $res if !$3 && $lev > 1;
1676             if( $3 ){
1677                 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1678             }
1679         }
1680         if( $lev == 1 ){
1681             $res .= pure_text( $$rstr );
1682         } else {
1683             warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n" unless $Quiet;
1684         }
1685     }
1686     return $res;
1687 }
1688
1689 #
1690 # go_ahead: extract text of an IS (can be nested)
1691 #
1692 sub go_ahead($$$){
1693     my( $rstr, $func, $closing ) = @_;
1694     my $res = '';
1695     my @closing = ($closing);
1696     while( $$rstr =~
1697       s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
1698         $res .= $1;
1699         unless( $3 ){
1700             shift @closing;
1701             return $res unless @closing;
1702         } else {
1703             unshift @closing, closing $4;
1704         }
1705         $res .= $2;
1706     }
1707     warn "$0: $Podfile: undelimited $func<> in paragraph $Paragraph.\n" unless $Quiet;
1708     return $res;
1709 }
1710
1711 #
1712 # emit_C - output result of C<text>
1713 #    $text is the depod-ed text
1714 #
1715 sub emit_C($;$$){
1716     my( $text, $nocode, $args ) = @_;
1717     $args = '' unless defined $args;
1718     my $res;
1719     my( $url, $fid ) = coderef( undef(), $text );
1720
1721     # need HTML-safe text
1722     my $linktext = html_escape( "$text$args" );
1723
1724     if( defined( $url ) &&
1725         (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1726         $res = "<a href=\"$url\"><code>$linktext</code></a>";
1727     } elsif( 0 && $nocode ){
1728         $res = $linktext;
1729     } else {
1730         $res = "<code>$linktext</code>";
1731     }
1732     return $res;
1733 }
1734
1735 #
1736 # html_escape: make text safe for HTML
1737 #
1738 sub html_escape {
1739     my $rest = $_[0];
1740     $rest   =~ s/&/&amp;/g;
1741     $rest   =~ s/</&lt;/g;
1742     $rest   =~ s/>/&gt;/g;
1743     $rest   =~ s/"/&quot;/g;
1744     # &apos; is only in XHTML, not HTML4.  Be conservative
1745     #$rest   =~ s/'/&apos;/g;
1746     return $rest;
1747 }
1748
1749
1750 #
1751 # dosify - convert filenames to 8.3
1752 #
1753 sub dosify {
1754     my($str) = @_;
1755     return lc($str) if $^O eq 'VMS';     # VMS just needs casing
1756     if ($Is83) {
1757         $str = lc $str;
1758         $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1759         $str =~ s/(\w+)/substr ($1,0,8)/ge;
1760     }
1761     return $str;
1762 }
1763
1764 #
1765 # page_sect - make a URL from the text of a L<>
1766 #
1767 sub page_sect($$) {
1768     my( $page, $section ) = @_;
1769     my( $linktext, $page83, $link);     # work strings
1770
1771     # check if we know that this is a section in this page
1772     if (!defined $Pages{$page} && defined $Sections{$page}) {
1773         $section = $page;
1774         $page = "";
1775         ### print STDERR "reset page='', section=$section\n";
1776     }
1777
1778     $page83=dosify($page);
1779     $page=$page83 if (defined $Pages{$page83});
1780     if ($page eq "") {
1781         $link = "#" . anchorify( $section );
1782     } elsif ( $page =~ /::/ ) {
1783         $page =~ s,::,/,g;
1784         # Search page cache for an entry keyed under the html page name,
1785         # then look to see what directory that page might be in.  NOTE:
1786         # this will only find one page. A better solution might be to produce
1787         # an intermediate page that is an index to all such pages.
1788         my $page_name = $page ;
1789         $page_name =~ s,^.*/,,s ;
1790         if ( defined( $Pages{ $page_name } ) &&
1791              $Pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1792            ) {
1793             $page = $1 ;
1794         }
1795         else {
1796             # NOTE: This branch assumes that all A::B pages are located in
1797             # $Htmlroot/A/B.html . This is often incorrect, since they are
1798             # often in $Htmlroot/lib/A/B.html or such like. Perhaps we could
1799             # analyze the contents of %Pages and figure out where any
1800             # cousins of A::B are, then assume that.  So, if A::B isn't found,
1801             # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1802             # lib/A/B.pm. This is also limited, but it's an improvement.
1803             # Maybe a hints file so that the links point to the correct places
1804             # nonetheless?
1805
1806         }
1807         $link = "$Htmlroot/$page.html";
1808         $link .= "#" . anchorify( $section ) if ($section);
1809     } elsif (!defined $Pages{$page}) {
1810         $link = "";
1811     } else {
1812         $section = anchorify( $section ) if $section ne "";
1813         ### print STDERR "...section=$section\n";
1814
1815         # if there is a directory by the name of the page, then assume that an
1816         # appropriate section will exist in the subdirectory
1817 #       if ($section ne "" && $Pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1818         if ($section ne "" && $Pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
1819             $link = "$Htmlroot/$1/$section.html";
1820             ### print STDERR "...link=$link\n";
1821
1822         # since there is no directory by the name of the page, the section will
1823         # have to exist within a .html of the same name.  thus, make sure there
1824         # is a .pod or .pm that might become that .html
1825         } else {
1826             $section = "#$section" if $section;
1827             ### print STDERR "...section=$section\n";
1828
1829             # check if there is a .pod with the page name.
1830             # for L<Foo>, Foo.(pod|pm) is preferred to A/Foo.(pod|pm)
1831             if ($Pages{$page} =~ /([^:]*)\.(?:pod|pm):/) {
1832                 $link = "$Htmlroot/$1.html$section";
1833             } else {
1834                 $link = "";
1835             }
1836         }
1837     }
1838
1839     if ($link) {
1840         # Here, we take advantage of the knowledge that $Htmlfileurl ne ''
1841         # implies $Htmlroot eq ''. This means that the link in question
1842         # needs a prefix of $Htmldir if it begins with '/'. The test for
1843         # the initial '/' is done to avoid '#'-only links, and to allow
1844         # for other kinds of links, like file:, ftp:, etc.
1845         my $url ;
1846         if (  $Htmlfileurl ne '' ) {
1847             $link = "$Htmldir$link" if $link =~ m{^/}s;
1848             $url = relativize_url( $link, $Htmlfileurl );
1849 # print( "  b: [$link,$Htmlfileurl,$url]\n" );
1850         }
1851         else {
1852             $url = $link ;
1853         }
1854         return $url;
1855
1856     } else {
1857         return undef();
1858     }
1859 }
1860
1861 #
1862 # relativize_url - convert an absolute URL to one relative to a base URL.
1863 # Assumes both end in a filename.
1864 #
1865 sub relativize_url {
1866     my ($dest,$source) = @_ ;
1867
1868     my ($dest_volume,$dest_directory,$dest_file) =
1869         File::Spec::Unix->splitpath( $dest ) ;
1870     $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1871
1872     my ($source_volume,$source_directory,$source_file) =
1873         File::Spec::Unix->splitpath( $source ) ;
1874     $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1875
1876     my $rel_path = '' ;
1877     if ( $dest ne '' ) {
1878        $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1879     }
1880
1881     if ( $rel_path ne ''                &&
1882          substr( $rel_path, -1 ) ne '/' &&
1883          substr( $dest_file, 0, 1 ) ne '#'
1884         ) {
1885         $rel_path .= "/$dest_file" ;
1886     }
1887     else {
1888         $rel_path .= "$dest_file" ;
1889     }
1890
1891     return $rel_path ;
1892 }
1893
1894
1895 #
1896 # coderef - make URL from the text of a C<>
1897 #
1898 sub coderef($$){
1899     my( $page, $item ) = @_;
1900     my( $url );
1901
1902     my $fid = fragment_id( $item );
1903     if( defined( $page ) && $page ne "" ){
1904         # we have been given a $page...
1905         $page =~ s{::}{/}g;
1906
1907         # Do we take it? Item could be a section!
1908         my $base = $Items{$fid} || "";
1909         $base =~ s{[^/]*/}{};
1910         if( $base ne "$page.html" ){
1911             ###   print STDERR "coderef( $page, $item ): items{$fid} = $Items{$fid} = $base => discard page!\n";
1912             $page = undef();
1913         }
1914
1915     } else {
1916         # no page - local items precede cached items
1917         if( defined( $fid ) ){
1918             if(  exists $Local_Items{$fid} ){
1919                 $page = $Local_Items{$fid};
1920             } else {
1921                 $page = $Items{$fid};
1922             }
1923         }
1924     }
1925
1926     # if there was a pod file that we found earlier with an appropriate
1927     # =item directive, then create a link to that page.
1928     if( defined $page ){
1929         if( $page ){
1930             if( exists $Pages{$page} and $Pages{$page} =~ /([^:.]*)\.[^:]*:/){
1931                 $page = $1 . '.html';
1932             }
1933             my $link = "$Htmlroot/$page#item_" . anchorify($fid);
1934
1935             # Here, we take advantage of the knowledge that $Htmlfileurl
1936             # ne '' implies $Htmlroot eq ''.
1937             if (  $Htmlfileurl ne '' ) {
1938                 $link = "$Htmldir$link" ;
1939                 $url = relativize_url( $link, $Htmlfileurl ) ;
1940             } else {
1941                 $url = $link ;
1942             }
1943         } else {
1944             $url = "#item_" . anchorify($fid);
1945         }
1946
1947         confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1948     }
1949     return( $url, $fid );
1950 }
1951
1952
1953
1954 #
1955 # Adapted from Nick Ing-Simmons' PodToHtml package.
1956 sub relative_url {
1957     my $source_file = shift ;
1958     my $destination_file = shift;
1959
1960     my $source = URI::file->new_abs($source_file);
1961     my $uo = URI::file->new($destination_file,$source)->abs;
1962     return $uo->rel->as_string;
1963 }
1964
1965
1966 #
1967 # finish_list - finish off any pending HTML lists.  this should be called
1968 # after the entire pod file has been read and converted.
1969 #
1970 sub finish_list {
1971     while ($Listlevel > 0) {
1972         print HTML "</dl>\n";
1973         $Listlevel--;
1974     }
1975 }
1976
1977 #
1978 # htmlify - converts a pod section specification to a suitable section
1979 # specification for HTML. Note that we keep spaces and special characters
1980 # except ", ? (Netscape problem) and the hyphen (writer's problem...).
1981 #
1982 sub htmlify {
1983     my( $heading) = @_;
1984     $heading =~ s/(\s+)/ /g;
1985     $heading =~ s/\s+\Z//;
1986     $heading =~ s/\A\s+//;
1987     # The hyphen is a disgrace to the English language.
1988     $heading =~ s/[-"?]//g;
1989     $heading = lc( $heading );
1990     return $heading;
1991 }
1992
1993 #
1994 # similar to htmlify, but turns non-alphanumerics into underscores
1995 #
1996 sub anchorify {
1997     my ($anchor) = @_;
1998     $anchor = htmlify($anchor);
1999     $anchor =~ s/\W/_/g;
2000     return $anchor;
2001 }
2002
2003 #
2004 # depod - convert text by eliminating all interior sequences
2005 # Note: can be called with copy or modify semantics
2006 #
2007 my %E2c;
2008 $E2c{lt}     = '<';
2009 $E2c{gt}     = '>';
2010 $E2c{sol}    = '/';
2011 $E2c{verbar} = '|';
2012 $E2c{amp}    = '&'; # in Tk's pods
2013
2014 sub depod1($;$$);
2015
2016 sub depod($){
2017     my $string;
2018     if( ref( $_[0] ) ){
2019         $string =  ${$_[0]};
2020         ${$_[0]} = depod1( \$string );
2021     } else {
2022         $string =  $_[0];
2023         depod1( \$string );
2024     }
2025 }
2026
2027 sub depod1($;$$){
2028   my( $rstr, $func, $closing ) = @_;
2029   my $res = '';
2030   return $res unless defined $$rstr;
2031   if( ! defined( $func ) ){
2032       # skip to next begin of an interior sequence
2033       while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?//s ){
2034          # recurse into its text
2035           $res .= $1 . depod1( $rstr, $2, closing $3);
2036       }
2037       $res .= $$rstr;
2038   } elsif( $func eq 'E' ){
2039       # E<x> - convert to character
2040       $$rstr =~ s/^([^>]*)>//;
2041       $res .= $E2c{$1} || "";
2042   } elsif( $func eq 'X' ){
2043       # X<> - ignore
2044       $$rstr =~ s/^[^>]*>//;
2045   } elsif( $func eq 'Z' ){
2046       # Z<> - empty
2047       $$rstr =~ s/^>//;
2048   } else {
2049       # all others: either recurse into new function or
2050       # terminate at closing angle bracket
2051       my $term = pattern $closing;
2052       while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2053           $res .= $1;
2054           last unless $3;
2055           $res .= depod1( $rstr, $3, closing $4 );
2056       }
2057       ## If we're here and $2 ne '>': undelimited interior sequence.
2058       ## Ignored, as this is called without proper indication of where we are.
2059       ## Rely on process_text to produce diagnostics.
2060   }
2061   return $res;
2062 }
2063
2064 #
2065 # fragment_id - construct a fragment identifier from:
2066 #   a) =item text
2067 #   b) contents of C<...>
2068 #
2069 my @HC;
2070 sub fragment_id {
2071     my $text = shift();
2072     $text =~ s/\s+\Z//s;
2073     if( $text ){
2074         # a method or function?
2075         return $1 if $text =~ /(\w+)\s*\(/;
2076         return $1 if $text =~ /->\s*(\w+)\s*\(?/;
2077
2078         # a variable name?
2079         return $1 if $text =~ /^([\$\@%*]\S+)/;
2080
2081         # some pattern matching operator?
2082         return $1 if $text =~ m|^(\w+/).*/\w*$|;
2083
2084         # fancy stuff... like "do { }"
2085         return $1 if $text =~ m|^(\w+)\s*{.*}$|;
2086
2087         # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
2088         # and some funnies with ... Module ...
2089         return $1 if $text =~ m{^([a-z\d_]+)(\s+[A-Z,/& ][A-Z\d,/& ]*)?$};
2090         return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2091
2092         # text? normalize!
2093         $text =~ s/\s+/_/sg;
2094         $text =~ s{(\W)}{
2095          defined( $HC[ord($1)] ) ? $HC[ord($1)]
2096                  : ( $HC[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2097         $text = substr( $text, 0, 50 );
2098     } else {
2099         return undef();
2100     }
2101 }
2102
2103 #
2104 # make_URL_href - generate HTML href from URL
2105 # Special treatment for CGI queries.
2106 #
2107 sub make_URL_href($){
2108     my( $url ) = @_;
2109     if( $url !~
2110         s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<a href="$1$2">$1</a>}i ){
2111         $url = "<a href=\"$url\">$url</a>";
2112     }
2113     return $url;
2114 }
2115
2116 1;