YA resync with mainstem, including VMS patches from others
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5 4
5use vars qw($VERSION @ISA @EXPORT);
2a28b791 6$VERSION = 1.03;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
54310121 9
10use Carp;
0e4548d5 11use Config;
12use Cwd;
13use File::Spec::Unix;
14use Getopt::Long;
15use Pod::Functions;
54310121 16
3ec07288 17use locale; # make \w work right in non-ASCII lands
18
54310121 19=head1 NAME
20
7b8d334a 21Pod::Html - module to convert pod files to HTML
54310121 22
23=head1 SYNOPSIS
24
7b8d334a 25 use Pod::Html;
54310121 26 pod2html([options]);
27
28=head1 DESCRIPTION
29
30Converts files from pod format (see L<perlpod>) to HTML format. It
31can automatically generate indexes and cross-references, and it keeps
32a cache of things it knows how to cross-reference.
33
34=head1 ARGUMENTS
35
36Pod::Html takes the following arguments:
37
38=over 4
39
0e4548d5 40=item backlink
41
42 --backlink="Back to Top"
43
44Adds "Back to Top" links in front of every HEAD1 heading (except for
45the first). By default, no backlink are being generated.
46
47=item css
48
49 --css=stylesheet
50
51Specify the URL of a cascading style sheet.
52
53=item flush
54
55 --flush
56
57Flushes the item and directory caches.
58
59=item header
60
61 --header
62 --noheader
63
64Creates header and footer blocks containing the text of the NAME
65section. By default, no headers are being generated.
66
54310121 67=item help
68
69 --help
70
71Displays the usage message.
72
5a039dd3 73=item htmldir
74
75 --htmldir=name
76
77Sets the directory in which the resulting HTML file is placed. This
29f227c9 78is used to generate relative links to other files. Not passing this
79causes all links to be absolute, since this is the value that tells
80Pod::Html the root of the documentation tree.
5a039dd3 81
54310121 82=item htmlroot
83
84 --htmlroot=name
85
86Sets the base URL for the HTML files. When cross-references are made,
87the HTML root is prepended to the URL.
88
0e4548d5 89=item index
90
91 --index
92 --noindex
93
94Generate an index at the top of the HTML file. This is the default
95behaviour.
96
54310121 97=item infile
98
99 --infile=name
100
101Specify the pod file to convert. Input is taken from STDIN if no
102infile is specified.
103
54310121 104=item libpods
105
106 --libpods=name:...:name
107
108List of page names (eg, "perlfunc") which contain linkable C<=item>s.
109
110=item netscape
111
112 --netscape
0e4548d5 113 --nonetscape
54310121 114
0e4548d5 115Use Netscape HTML directives when applicable. By default, they will
116B<not> be used.
54310121 117
0e4548d5 118=item outfile
54310121 119
0e4548d5 120 --outfile=name
54310121 121
0e4548d5 122Specify the HTML file to create. Output goes to STDOUT if no outfile
123is specified.
54310121 124
0e4548d5 125=item podpath
54310121 126
0e4548d5 127 --podpath=name:...:name
54310121 128
0e4548d5 129Specify which subdirectories of the podroot contain pod files whose
130HTML converted forms can be linked-to in cross-references.
54310121 131
0e4548d5 132=item podroot
54310121 133
0e4548d5 134 --podroot=name
54310121 135
0e4548d5 136Specify the base directory for finding library pods.
54310121 137
0e4548d5 138=item quiet
54310121 139
0e4548d5 140 --quiet
141 --noquiet
54310121 142
0e4548d5 143Don't display I<mostly harmless> warning messages. These messages
144will be displayed by default. But this is not the same as C<verbose>
145mode.
54310121 146
0e4548d5 147=item recurse
54310121 148
0e4548d5 149 --recurse
54310121 150 --norecurse
151
0e4548d5 152Recurse into subdirectories specified in podpath (default behaviour).
54310121 153
154=item title
155
156 --title=title
157
158Specify the title of the resulting HTML file.
159
160=item verbose
161
162 --verbose
0e4548d5 163 --noverbose
54310121 164
0e4548d5 165Display progress messages. By default, they won't be displayed.
34db337b 166
54310121 167=back
168
169=head1 EXAMPLE
170
171 pod2html("pod2html",
172 "--podpath=lib:ext:pod:vms",
173 "--podroot=/usr/src/perl",
174 "--htmlroot=/perl/nmanual",
175 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
176 "--recurse",
177 "--infile=foo.pod",
178 "--outfile=/perl/nmanual/foo.html");
179
34db337b 180=head1 ENVIRONMENT
181
182Uses $Config{pod2html} to setup default options.
183
54310121 184=head1 AUTHOR
185
186Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
187
54310121 188=head1 SEE ALSO
189
190L<perlpod>
191
192=head1 COPYRIGHT
193
194This program is distributed under the Artistic License.
195
196=cut
197
1f763251 198my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
199my $dircache = "pod2htmd$cache_ext";
200my $itemcache = "pod2htmi$cache_ext";
54310121 201
202my @begin_stack = (); # begin/end stack
203
29f227c9 204my @libpods = (); # files to search for links from C<> directives
205my $htmlroot = "/"; # http-server base directory from which all
54310121 206 # relative paths in $podpath stem.
5a039dd3 207my $htmldir = ""; # The directory to which the html pages
208 # will (eventually) be written.
54310121 209my $htmlfile = ""; # write to stdout by default
29f227c9 210my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 211 # refer to this file. This is only used
212 # to make relative urls that point to
213 # other files.
54310121 214my $podfile = ""; # read from stdin by default
215my @podpath = (); # list of directories containing library pods.
216my $podroot = "."; # filesystem base directory from which all
217 # relative paths in $podpath stem.
34db337b 218my $css = ''; # Cascading style sheet
54310121 219my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 220my $quiet = 0; # not quiet by default
54310121 221my $verbose = 0; # not verbose by default
222my $doindex = 1; # non-zero if we should generate an index
0e4548d5 223my $backlink = ''; # text for "back to top" links
54310121 224my $listlevel = 0; # current list depth
2a28b791 225my @listend = (); # the text to use to end the list.
226my $after_lpar = 0; # set to true after a par in an =item
54310121 227my $ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
229 # directive.
230
231my %items_named = (); # for the multiples of the same item in perlfunc
232my @items_seen = ();
233my $netscape = 0; # whether or not to use netscape directives.
234my $title; # title to give the pod(s)
34db337b 235my $header = 0; # produce block header/footer
54310121 236my $top = 1; # true if we are at the top of the doc. used
237 # to prevent the first <HR> directive.
238my $paragraph; # which paragraph we're processing (used
239 # for error messages)
2a28b791 240my $ptQuote = 0; # status of double-quote conversion
54310121 241my %pages = (); # associative array used to find the location
242 # of pages referenced by L<> links.
243my %sections = (); # sections within this page
244my %items = (); # associative array used to find the location
245 # of =item directives referenced by C<> links
2a28b791 246my %local_items = (); # local items - avoid destruction of %items
39e571d4 247my $Is83; # is dos with short filenames (8.3)
248
54310121 249sub init_globals {
04f720db 250$dircache = "pod2htmd$cache_ext";
251$itemcache = "pod2htmi$cache_ext";
54310121 252
253@begin_stack = (); # begin/end stack
254
255@libpods = (); # files to search for links from C<> directives
256$htmlroot = "/"; # http-server base directory from which all
257 # relative paths in $podpath stem.
67398a75 258$htmldir = ""; # The directory to which the html pages
259 # will (eventually) be written.
54310121 260$htmlfile = ""; # write to stdout by default
261$podfile = ""; # read from stdin by default
262@podpath = (); # list of directories containing library pods.
263$podroot = "."; # filesystem base directory from which all
264 # relative paths in $podpath stem.
34db337b 265$css = ''; # Cascading style sheet
54310121 266$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 267$quiet = 0; # not quiet by default
54310121 268$verbose = 0; # not verbose by default
269$doindex = 1; # non-zero if we should generate an index
0e4548d5 270$backlink = ''; # text for "back to top" links
54310121 271$listlevel = 0; # current list depth
2a28b791 272@listend = (); # the text to use to end the list.
273$after_lpar = 0; # set to true after a par in an =item
54310121 274$ignore = 1; # whether or not to format text. we don't
275 # format text until we hit our first pod
276 # directive.
277
278@items_seen = ();
279%items_named = ();
280$netscape = 0; # whether or not to use netscape directives.
34db337b 281$header = 0; # produce block header/footer
54310121 282$title = ''; # title to give the pod(s)
283$top = 1; # true if we are at the top of the doc. used
284 # to prevent the first <HR> directive.
285$paragraph = ''; # which paragraph we're processing (used
286 # for error messages)
54310121 287%sections = (); # sections within this page
3e3baf6d 288
289# These are not reinitialised here but are kept as a cache.
290# See get_cache and related cache management code.
291#%pages = (); # associative array used to find the location
292 # of pages referenced by L<> links.
293#%items = (); # associative array used to find the location
54310121 294 # of =item directives referenced by C<> links
2a28b791 295%local_items = ();
39e571d4 296$Is83=$^O eq 'dos';
54310121 297}
298
2a28b791 299#
300# clean_data: global clean-up of pod data
301#
302sub clean_data($){
303 my( $dataref ) = @_;
304 my $i;
305 for( $i = 0; $i <= $#$dataref; $i++ ){
306 ${$dataref}[$i] =~ s/\s+\Z//;
307
308 # have a look for all-space lines
309 if( ${$dataref}[$i] =~ /^\s+$/m ){
310 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
311 splice( @$dataref, $i, 1, @chunks );
312 }
313 }
314}
315
316
54310121 317sub pod2html {
318 local(@ARGV) = @_;
319 local($/);
320 local $_;
321
322 init_globals();
323
39e571d4 324 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
325
54310121 326 # cache of %pages and %items from last time we ran pod2html
54310121 327
328 #undef $opt_help if defined $opt_help;
329
330 # parse the command-line parameters
331 parse_command_line();
332
333 # set some variables to their default values if necessary
334 local *POD;
335 unless (@ARGV && $ARGV[0]) {
336 $podfile = "-" unless $podfile; # stdin
337 open(POD, "<$podfile")
338 || die "$0: cannot open $podfile file for input: $!\n";
339 } else {
340 $podfile = $ARGV[0]; # XXX: might be more filenames
341 *POD = *ARGV;
342 }
343 $htmlfile = "-" unless $htmlfile; # stdout
344 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
fe6f1558 345 $htmldir =~ s#/\z## ; # so we don't get a //
29f227c9 346 if ( $htmlroot eq ''
347 && defined( $htmldir )
348 && $htmldir ne ''
349 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
350 )
5a039dd3 351 {
29f227c9 352 # Set the 'base' url for this file, so that we can use it
353 # as the location from which to calculate relative links
354 # to other files. If this is '', then absolute links will
355 # be used throughout.
356 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 357 }
54310121 358
359 # read the pod a paragraph at a time
360 warn "Scanning for sections in input file(s)\n" if $verbose;
361 $/ = "";
362 my @poddata = <POD>;
363 close(POD);
2a28b791 364 clean_data( \@poddata );
54310121 365
366 # scan the pod for =head[1-6] directives and build an index
367 my $index = scan_headings(\%sections, @poddata);
368
3e3baf6d 369 unless($index) {
31e56455 370 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 371 }
372
54310121 373 # open the output file
374 open(HTML, ">$htmlfile")
375 || die "$0: cannot open $htmlfile file for output: $!\n";
376
d011ffae 377 # put a title in the HTML file if one wasn't specified
378 if ($title eq '') {
379 TITLE_SEARCH: {
380 for (my $i = 0; $i < @poddata; $i++) {
381 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
382 for my $para ( @poddata[$i, $i+1] ) {
383 last TITLE_SEARCH
384 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
385 }
386 }
54310121 387
d011ffae 388 }
389 }
390 }
fe6f1558 391 if (!$title and $podfile =~ /\.pod\z/) {
3e3baf6d 392 # probably a split pod so take first =head[12] as title
393 for (my $i = 0; $i < @poddata; $i++) {
394 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
395 }
396 warn "adopted '$title' as title for $podfile\n"
397 if $verbose and $title;
398 }
7b8d334a 399 if ($title) {
400 $title =~ s/\s*\(.*\)//;
401 } else {
34db337b 402 warn "$0: no title for $podfile" unless $quiet;
fe6f1558 403 $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
54310121 404 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 405 warn "using $title" if $verbose;
54310121 406 }
34db337b 407 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
408 $csslink =~ s,\\,/,g;
409 $csslink =~ s,(/.):,$1|,;
410
411 my $block = $header ? <<END_OF_BLOCK : '';
412<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
413<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
414<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
415</TD></TR>
416</TABLE>
417END_OF_BLOCK
418
54310121 419 print HTML <<END_OF_HEAD;
7b8d334a 420<HTML>
421<HEAD>
34db337b 422<TITLE>$title</TITLE>$csslink
7b8d334a 423<LINK REV="made" HREF="mailto:$Config{perladmin}">
424</HEAD>
54310121 425
7b8d334a 426<BODY>
34db337b 427$block
54310121 428END_OF_HEAD
429
3e3baf6d 430 # load/reload/validate/cache %pages and %items
431 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 432
433 # scan the pod for =item directives
2a28b791 434 scan_items( \%local_items, "", @poddata);
54310121 435
436 # put an index at the top of the file. note, if $doindex is 0 we
437 # still generate an index, but surround it with an html comment.
438 # that way some other program can extract it if desired.
439 $index =~ s/--+/-/g;
2a28b791 440 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 441 print HTML "<!-- INDEX BEGIN -->\n";
442 print HTML "<!--\n" unless $doindex;
443 print HTML $index;
444 print HTML "-->\n" unless $doindex;
445 print HTML "<!-- INDEX END -->\n\n";
31e56455 446 print HTML "<HR>\n" if $doindex and $index;
54310121 447
448 # now convert this file
2a28b791 449 my $after_item; # set to true after an =item
450 warn "Converting input file $podfile\n" if $verbose;
451 foreach my $i (0..$#poddata){
452 $ptQuote = 0; # status of quote conversion
453
54310121 454 $_ = $poddata[$i];
455 $paragraph = $i+1;
456 if (/^(=.*)/s) { # is it a pod directive?
457 $ignore = 0;
2a28b791 458 $after_item = 0;
54310121 459 $_ = $1;
460 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
461 process_begin($1, $2);
462 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
463 process_end($1, $2);
464 } elsif (/^=cut/) { # =cut
465 process_cut();
466 } elsif (/^=pod/) { # =pod
467 process_pod();
468 } else {
469 next if @begin_stack && $begin_stack[-1] ne 'html';
470
7b8d334a 471 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 472 process_head( $1, $2, $doindex && $index );
473 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
474 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 475 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791 476 process_item( $1 );
477 $after_item = 1;
54310121 478 } elsif (/^=over\s*(.*)/) { # =over N
479 process_over();
480 } elsif (/^=back/) { # =back
481 process_back();
a45bd81d 482 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121 483 process_for($1,$2);
484 } else {
485 /^=(\S*)\s*/;
486 warn "$0: $podfile: unknown pod directive '$1' in "
487 . "paragraph $paragraph. ignoring.\n";
488 }
489 }
490 $top = 0;
491 }
492 else {
493 next if $ignore;
494 next if @begin_stack && $begin_stack[-1] ne 'html';
495 my $text = $_;
2a28b791 496 if( $text =~ /\A\s+/ ){
497 process_pre( \$text );
498 print HTML "<PRE>\n$text</PRE>\n";
499
500 } else {
501 process_text( \$text );
502
503 # experimental: check for a paragraph where all lines
504 # have some ...\t...\t...\n pattern
505 if( $text =~ /\t/ ){
506 my @lines = split( "\n", $text );
507 if( @lines > 1 ){
508 my $all = 2;
509 foreach my $line ( @lines ){
510 if( $line =~ /\S/ && $line !~ /\t/ ){
511 $all--;
512 last if $all == 0;
513 }
514 }
515 if( $all > 0 ){
516 $text =~ s/\t+/<TD>/g;
517 $text =~ s/^/<TR><TD>/gm;
518 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
519 $text . '</TABLE>';
520 }
521 }
522 }
523 ## end of experimental
524
525 if( $after_item ){
526 print HTML "$text\n";
527 $after_lpar = 1;
528 } else {
529 print HTML "<P>$text</P>\n";
530 }
531 }
532 $after_item = 0;
54310121 533 }
534 }
535
536 # finish off any pending directives
537 finish_list();
2a28b791 538
539 # link to page index
0e4548d5 540 print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n"
541 if $doindex and $index and $backlink;
2a28b791 542
54310121 543 print HTML <<END_OF_TAIL;
34db337b 544$block
7b8d334a 545</BODY>
54310121 546
7b8d334a 547</HTML>
54310121 548END_OF_TAIL
549
550 # close the html file
551 close(HTML);
552
553 warn "Finished\n" if $verbose;
554}
555
556##############################################################################
557
558my $usage; # see below
559sub usage {
560 my $podfile = shift;
561 warn "$0: $podfile: @_\n" if @_;
562 die $usage;
563}
564
565$usage =<<END_OF_USAGE;
566Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
567 --podpath=<name>:...:<name> --podroot=<name>
568 --libpods=<name>:...:<name> --recurse --verbose --index
569 --netscape --norecurse --noindex
570
0e4548d5 571 --backlink - set text for "back to top" links (default: none).
572 --css - stylesheet URL
573 --flush - flushes the item and directory caches.
574 --[no]header - produce block header/footer (default is no headers).
575 --help - prints this message.
576 --htmldir - directory for resulting HTML files.
577 --htmlroot - http-server base directory from which all relative paths
578 in podpath stem (default is /).
579 --[no]index - generate an index at the top of the resulting html
580 (default behaviour).
581 --infile - filename for the pod to convert (input taken from stdin
582 by default).
583 --libpods - colon-separated list of pages to search for =item pod
584 directives in as targets of C<> and implicit links (empty
585 by default). note, these are not filenames, but rather
586 page names like those that appear in L<> links.
587 --[no]netscape - will use netscape html directives when applicable.
588 (default is not to use them).
589 --outfile - filename for the resulting html file (output sent to
590 stdout by default).
591 --podpath - colon-separated list of directories containing library
592 pods (empty by default).
593 --podroot - filesystem base directory from which all relative paths
594 in podpath stem (default is .).
595 --[no]quiet - supress some benign warning messages (default is off).
596 --[no]recurse - recurse on those subdirectories listed in podpath
597 (default behaviour).
598 --title - title that will appear in resulting html file.
599 --[no]verbose - self-explanatory (off by default).
54310121 600
601END_OF_USAGE
602
603sub parse_command_line {
0e4548d5 604 my ($opt_backlink,$opt_css,$opt_flush,$opt_header,$opt_help,$opt_htmldir,
605 $opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,
606 $opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,$opt_recurse,
607 $opt_title,$opt_verbose);
608
34db337b 609 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 610 my $result = GetOptions(
0e4548d5 611 'backlink=s' => \$opt_backlink,
612 'css=s' => \$opt_css,
29f227c9 613 'flush' => \$opt_flush,
0e4548d5 614 'header!' => \$opt_header,
29f227c9 615 'help' => \$opt_help,
616 'htmldir=s' => \$opt_htmldir,
54310121 617 'htmlroot=s' => \$opt_htmlroot,
29f227c9 618 'index!' => \$opt_index,
54310121 619 'infile=s' => \$opt_infile,
620 'libpods=s' => \$opt_libpods,
621 'netscape!' => \$opt_netscape,
622 'outfile=s' => \$opt_outfile,
623 'podpath=s' => \$opt_podpath,
624 'podroot=s' => \$opt_podroot,
0e4548d5 625 'quiet!' => \$opt_quiet,
54310121 626 'recurse!' => \$opt_recurse,
627 'title=s' => \$opt_title,
0e4548d5 628 'verbose!' => \$opt_verbose,
54310121 629 );
630 usage("-", "invalid parameters") if not $result;
631
632 usage("-") if defined $opt_help; # see if the user asked for help
633 $opt_help = ""; # just to make -w shut-up.
634
54310121 635 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
636 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
637
0e4548d5 638 $backlink = $opt_backlink if defined $opt_backlink;
639 $css = $opt_css if defined $opt_css;
640 $header = $opt_header if defined $opt_header;
641 $htmldir = $opt_htmldir if defined $opt_htmldir;
642 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
643 $doindex = $opt_index if defined $opt_index;
644 $podfile = $opt_infile if defined $opt_infile;
645 $netscape = $opt_netscape if defined $opt_netscape;
646 $htmlfile = $opt_outfile if defined $opt_outfile;
647 $podroot = $opt_podroot if defined $opt_podroot;
648 $quiet = $opt_quiet if defined $opt_quiet;
649 $recurse = $opt_recurse if defined $opt_recurse;
650 $title = $opt_title if defined $opt_title;
651 $verbose = $opt_verbose if defined $opt_verbose;
652
54310121 653 warn "Flushing item and directory caches\n"
654 if $opt_verbose && defined $opt_flush;
655 unlink($dircache, $itemcache) if defined $opt_flush;
54310121 656}
657
3e3baf6d 658
659my $saved_cache_key;
660
661sub get_cache {
662 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
663 my @cache_key_args = @_;
664
665 # A first-level cache:
666 # Don't bother reading the cache files if they still apply
667 # and haven't changed since we last read them.
668
669 my $this_cache_key = cache_key(@cache_key_args);
670
671 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
672
673 # load the cache of %pages and %items if possible. $tests will be
674 # non-zero if successful.
675 my $tests = 0;
676 if (-f $dircache && -f $itemcache) {
677 warn "scanning for item cache\n" if $verbose;
678 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
679 }
680
681 # if we didn't succeed in loading the cache then we must (re)build
682 # %pages and %items.
683 if (!$tests) {
684 warn "scanning directories in pod-path\n" if $verbose;
685 scan_podpath($podroot, $recurse, 0);
686 }
687 $saved_cache_key = cache_key(@cache_key_args);
688}
689
690sub cache_key {
691 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
692 return join('!', $dircache, $itemcache, $recurse,
29f227c9 693 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 694}
695
54310121 696#
3e3baf6d 697# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 698# are valid caches of %pages and %items. if they are valid then it loads
699# them and returns a non-zero value.
700#
3e3baf6d 701sub load_cache {
54310121 702 my($dircache, $itemcache, $podpath, $podroot) = @_;
703 my($tests);
704 local $_;
705
706 $tests = 0;
707
708 open(CACHE, "<$itemcache") ||
709 die "$0: error opening $itemcache for reading: $!\n";
710 $/ = "\n";
711
712 # is it the same podpath?
713 $_ = <CACHE>;
714 chomp($_);
3e3baf6d 715 $tests++ if (join(":", @$podpath) eq $_);
54310121 716
717 # is it the same podroot?
718 $_ = <CACHE>;
719 chomp($_);
720 $tests++ if ($podroot eq $_);
721
722 # load the cache if its good
723 if ($tests != 2) {
724 close(CACHE);
54310121 725 return 0;
726 }
727
728 warn "loading item cache\n" if $verbose;
729 while (<CACHE>) {
730 /(.*?) (.*)$/;
731 $items{$1} = $2;
732 }
733 close(CACHE);
734
735 warn "scanning for directory cache\n" if $verbose;
736 open(CACHE, "<$dircache") ||
737 die "$0: error opening $dircache for reading: $!\n";
738 $/ = "\n";
739 $tests = 0;
740
741 # is it the same podpath?
742 $_ = <CACHE>;
743 chomp($_);
3e3baf6d 744 $tests++ if (join(":", @$podpath) eq $_);
54310121 745
746 # is it the same podroot?
747 $_ = <CACHE>;
748 chomp($_);
749 $tests++ if ($podroot eq $_);
750
751 # load the cache if its good
752 if ($tests != 2) {
753 close(CACHE);
54310121 754 return 0;
755 }
756
757 warn "loading directory cache\n" if $verbose;
758 while (<CACHE>) {
759 /(.*?) (.*)$/;
760 $pages{$1} = $2;
761 }
762
763 close(CACHE);
764
765 return 1;
766}
767
768#
769# scan_podpath - scans the directories specified in @podpath for directories,
770# .pod files, and .pm files. it also scans the pod files specified in
771# @libpods for =item directives.
772#
773sub scan_podpath {
3e3baf6d 774 my($podroot, $recurse, $append) = @_;
54310121 775 my($pwd, $dir);
776 my($libpod, $dirname, $pod, @files, @poddata);
777
3e3baf6d 778 unless($append) {
779 %items = ();
780 %pages = ();
781 }
782
54310121 783 # scan each directory listed in @podpath
784 $pwd = getcwd();
785 chdir($podroot)
786 || die "$0: error changing to directory $podroot: $!\n";
787 foreach $dir (@podpath) {
788 scan_dir($dir, $recurse);
789 }
790
791 # scan the pods listed in @libpods for =item directives
792 foreach $libpod (@libpods) {
793 # if the page isn't defined then we won't know where to find it
794 # on the system.
795 next unless defined $pages{$libpod} && $pages{$libpod};
796
797 # if there is a directory then use the .pod and .pm files within it.
29f227c9 798 # NOTE: Only finds the first so-named directory in the tree.
799# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
800 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 801 # find all the .pod and .pm files within the directory
802 $dirname = $1;
803 opendir(DIR, $dirname) ||
804 die "$0: error opening directory $dirname: $!\n";
fe6f1558 805 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121 806 closedir(DIR);
807
808 # scan each .pod and .pm file for =item directives
809 foreach $pod (@files) {
810 open(POD, "<$dirname/$pod") ||
811 die "$0: error opening $dirname/$pod for input: $!\n";
812 @poddata = <POD>;
813 close(POD);
2a28b791 814 clean_data( \@poddata );
54310121 815
2a28b791 816 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 817 }
818
819 # use the names of files as =item directives too.
2a28b791 820### Don't think this should be done this way - confuses issues.(WL)
821### foreach $pod (@files) {
822### $pod =~ /^(.*)(\.pod|\.pm)$/;
823### $items{$1} = "$dirname/$1.html" if $1;
824### }
54310121 825 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
826 $pages{$libpod} =~ /([^:]*\.pm):/) {
827 # scan the .pod or .pm file for =item directives
828 $pod = $1;
829 open(POD, "<$pod") ||
830 die "$0: error opening $pod for input: $!\n";
831 @poddata = <POD>;
832 close(POD);
2a28b791 833 clean_data( \@poddata );
54310121 834
2a28b791 835 scan_items( \%items, "$pod", @poddata);
54310121 836 } else {
837 warn "$0: shouldn't be here (line ".__LINE__."\n";
838 }
839 }
840 @poddata = (); # clean-up a bit
841
842 chdir($pwd)
843 || die "$0: error changing to directory $pwd: $!\n";
844
845 # cache the item list for later use
846 warn "caching items for later use\n" if $verbose;
847 open(CACHE, ">$itemcache") ||
848 die "$0: error open $itemcache for writing: $!\n";
849
850 print CACHE join(":", @podpath) . "\n$podroot\n";
851 foreach my $key (keys %items) {
852 print CACHE "$key $items{$key}\n";
853 }
854
855 close(CACHE);
856
857 # cache the directory list for later use
858 warn "caching directories for later use\n" if $verbose;
859 open(CACHE, ">$dircache") ||
860 die "$0: error open $dircache for writing: $!\n";
861
862 print CACHE join(":", @podpath) . "\n$podroot\n";
863 foreach my $key (keys %pages) {
864 print CACHE "$key $pages{$key}\n";
865 }
866
867 close(CACHE);
868}
869
870#
871# scan_dir - scans the directory specified in $dir for subdirectories, .pod
872# files, and .pm files. notes those that it finds. this information will
873# be used later in order to figure out where the pages specified in L<>
874# links are on the filesystem.
875#
876sub scan_dir {
877 my($dir, $recurse) = @_;
878 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
879 local $_;
880
881 @subdirs = ();
882 @pods = ();
883
884 opendir(DIR, $dir) ||
885 die "$0: error opening directory $dir: $!\n";
886 while (defined($_ = readdir(DIR))) {
887 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
888 $pages{$_} = "" unless defined $pages{$_};
889 $pages{$_} .= "$dir/$_:";
890 push(@subdirs, $_);
fe6f1558 891 } elsif (/\.pod\z/) { # .pod
892 s/\.pod\z//;
54310121 893 $pages{$_} = "" unless defined $pages{$_};
894 $pages{$_} .= "$dir/$_.pod:";
895 push(@pods, "$dir/$_.pod");
fe6f1558 896 } elsif (/\.pm\z/) { # .pm
897 s/\.pm\z//;
54310121 898 $pages{$_} = "" unless defined $pages{$_};
899 $pages{$_} .= "$dir/$_.pm:";
900 push(@pods, "$dir/$_.pm");
901 }
902 }
903 closedir(DIR);
904
905 # recurse on the subdirectories if necessary
906 if ($recurse) {
907 foreach my $subdir (@subdirs) {
908 scan_dir("$dir/$subdir", $recurse);
909 }
910 }
911}
912
913#
914# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
915# build an index.
916#
917sub scan_headings {
918 my($sections, @data) = @_;
2a28b791 919 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 920
be173d55 921 # here we need local $ignore = 0;
922 # unfortunately, we can't have it, because $ignore is lexical
923 $ignore = 0;
924
54310121 925 $listdepth = 0;
926 $index = "";
927
928 # scan for =head directives, note their name, and build an index
929 # pointing to each of them.
930 foreach my $line (@data) {
bb9460ed 931 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 932 ($tag, $which_head, $otitle) = ($1,$2,$3);
933
934 my $title = depod( $otitle );
935 my $name = htmlify( $title );
936 $$sections{$name} = 1;
937 $title = process_text( \$otitle );
54310121 938
102c538a 939 while ($which_head != $listdepth) {
940 if ($which_head > $listdepth) {
941 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
942 $listdepth++;
943 } elsif ($which_head < $listdepth) {
944 $listdepth--;
945 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
946 }
54310121 947 }
54310121 948
949 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 950 "<A HREF=\"#" . $name . "\">" .
951 $title . "</A></LI>";
54310121 952 }
953 }
954
955 # finish off the lists
956 while ($listdepth--) {
957 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
958 }
959
960 # get rid of bogus lists
961 $index =~ s,\t*<UL>\s*</UL>\n,,g;
962
bb9460ed 963 $ignore = 1; # restore old value;
be173d55 964
54310121 965 return $index;
966}
967
968#
969# scan_items - scans the pod specified by $pod for =item directives. we
970# will use this information later on in resolving C<> links.
971#
972sub scan_items {
2a28b791 973 my( $itemref, $pod, @poddata ) = @_;
54310121 974 my($i, $item);
975 local $_;
976
fe6f1558 977 $pod =~ s/\.pod\z//;
54310121 978 $pod .= ".html" if $pod;
979
980 foreach $i (0..$#poddata) {
2a28b791 981 my $txt = depod( $poddata[$i] );
982
983 # figure out what kind of item it is.
984 # Build string for referencing this item.
985 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
986 next unless $1;
987 $item = $1;
988 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
989 $item = $1;
990 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
991 $item = $1;
992 } else {
993 next;
54310121 994 }
2a28b791 995 my $fid = fragment_id( $item );
996 $$itemref{$fid} = "$pod" if $fid;
54310121 997 }
998}
999
1000#
1001# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1002#
1003sub process_head {
2a28b791 1004 my($tag, $heading, $hasindex) = @_;
54310121 1005
1006 # figure out the level of the =head
1007 $tag =~ /head([1-6])/;
1008 my $level = $1;
1009
2a28b791 1010 if( $listlevel ){
1011 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
1012 while( $listlevel ){
1013 process_back();
1014 }
1015 }
1016
1017 print HTML "<P>\n";
1018 if( $level == 1 && ! $top ){
0e4548d5 1019 print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"
1020 if $hasindex and $backlink;
2a28b791 1021 print HTML "<HR>\n"
1022 }
1023
1024 my $name = htmlify( depod( $heading ) );
1025 my $convert = process_text( \$heading );
1026 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1027}
1028
2a28b791 1029
54310121 1030#
2a28b791 1031# emit_item_tag - print an =item's text
1032# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1033#
2a28b791 1034my $EmittedItem;
1035
1036sub emit_item_tag($$$){
1037 my( $otext, $text, $compact ) = @_;
1038 my $item = fragment_id( $text );
54310121 1039
2a28b791 1040 $EmittedItem = $item;
1041 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1042
2a28b791 1043 print HTML '<STRONG>';
1044 if ($items_named{$item}++) {
1045 print HTML process_text( \$otext );
1046 } else {
1047 my $name = 'item_' . $item;
1048 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1049 }
1050 print HTML "</STRONG><BR>\n";
1051 undef( $EmittedItem );
1052}
1053
1054sub emit_li {
1055 my( $tag ) = @_;
1056 if( $items_seen[$listlevel]++ == 0 ){
1057 push( @listend, "</$tag>" );
1058 print HTML "<$tag>\n";
1059 }
1060 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1061}
1062
1063#
1064# process_item - convert a pod item tag and convert it to HTML format.
1065#
1066sub process_item {
1067 my( $otext ) = @_;
54310121 1068
1069 # lots of documents start a list without doing an =over. this is
1070 # bad! but, the proper thing to do seems to be to just assume
1071 # they did do an =over. so warn them once and then continue.
2a28b791 1072 if( $listlevel == 0 ){
1073 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1074 process_over();
1075 }
54310121 1076
2a28b791 1077 # formatting: insert a paragraph if preceding item has >1 paragraph
1078 if( $after_lpar ){
1079 print HTML "<P></P>\n";
1080 $after_lpar = 0;
1081 }
54310121 1082
1083 # remove formatting instructions from the text
2a28b791 1084 my $text = depod( $otext );
1085
1086 # all the list variants:
1087 if( $text =~ /\A\*/ ){ # bullet
1088 emit_li( 'UL' );
1089 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1090 my $tag = $1;
1091 $otext =~ s/\A\*\s+//;
1092 emit_item_tag( $otext, $tag, 1 );
54310121 1093 }
1094
2a28b791 1095 } elsif( $text =~ /\A\d+/ ){ # numbered list
1096 emit_li( 'OL' );
1097 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1098 my $tag = $1;
1099 $otext =~ s/\A\d+\.?\s*//;
1100 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1101 }
54310121 1102
2a28b791 1103 } else { # definition list
1104 emit_li( 'DL' );
1105 if ($text =~ /\A(.+)\Z/s ){ # should have text
1106 emit_item_tag( $otext, $text, 1 );
7b8d334a 1107 }
54310121 1108 print HTML '<DD>';
1109 }
54310121 1110 print HTML "\n";
1111}
1112
1113#
2a28b791 1114# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1115#
1116sub process_over {
1117 # start a new list
1118 $listlevel++;
2a28b791 1119 push( @items_seen, 0 );
1120 $after_lpar = 0;
54310121 1121}
1122
1123#
1124# process_back - process a pod back tag and convert it to HTML format.
1125#
1126sub process_back {
2a28b791 1127 if( $listlevel == 0 ){
1128 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1129 return;
1130 }
54310121 1131
1132 # close off the list. note, I check to see if $listend[$listlevel] is
1133 # defined because an =item directive may have never appeared and thus
1134 # $listend[$listlevel] may have never been initialized.
1135 $listlevel--;
2a28b791 1136 if( defined $listend[$listlevel] ){
1137 print HTML '<P></P>' if $after_lpar;
1138 print HTML $listend[$listlevel];
1139 print HTML "\n";
1140 pop( @listend );
1141 }
1142 $after_lpar = 0;
54310121 1143
2a28b791 1144 # clean up item count
1145 pop( @items_seen );
54310121 1146}
1147
1148#
2a28b791 1149# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1150#
1151sub process_cut {
1152 $ignore = 1;
1153}
1154
1155#
2a28b791 1156# process_pod - process a pod pod tag, thus stop ignoring pod directives
1157# until we see a corresponding cut.
54310121 1158#
1159sub process_pod {
1160 # no need to set $ignore to 0 cause the main loop did it
1161}
1162
1163#
2a28b791 1164# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1165# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1166#
1167sub process_for {
1168 my($whom, $text) = @_;
1169 if ( $whom =~ /^(pod2)?html$/i) {
1170 print HTML $text;
c4d9b39d 1171 } elsif ($whom =~ /^illustration$/i) {
1172 1 while chomp $text;
1173 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1174 $text .= $ext, last if -r "$text$ext";
1175 }
1176 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1177 }
54310121 1178}
1179
1180#
1181# process_begin - process a =begin pod tag. this pushes
1182# whom we're beginning on the begin stack. if there's a
1183# begin stack, we only print if it us.
1184#
1185sub process_begin {
1186 my($whom, $text) = @_;
1187 $whom = lc($whom);
1188 push (@begin_stack, $whom);
1189 if ( $whom =~ /^(pod2)?html$/) {
1190 print HTML $text if $text;
1191 }
1192}
1193
1194#
1195# process_end - process a =end pod tag. pop the
1196# begin stack. die if we're mismatched.
1197#
1198sub process_end {
1199 my($whom, $text) = @_;
1200 $whom = lc($whom);
1201 if ($begin_stack[-1] ne $whom ) {
1202 die "Unmatched begin/end at chunk $paragraph\n"
1203 }
2a28b791 1204 pop( @begin_stack );
54310121 1205}
1206
1207#
2a28b791 1208# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1209#
2a28b791 1210sub process_pre {
1211 my( $text ) = @_;
1212 my( $rest );
54310121 1213 return if $ignore;
1214
54310121 1215 $rest = $$text;
1216
2a28b791 1217 # insert spaces in place of tabs
1218 $rest =~ s#.*#
be173d55 1219 my $line = $&;
1220 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1221 $line;
1222 #eg;
54310121 1223
2a28b791 1224 # convert some special chars to HTML escapes
1225 $rest =~ s/&/&amp;/g;
1226 $rest =~ s/</&lt;/g;
1227 $rest =~ s/>/&gt;/g;
1228 $rest =~ s/"/&quot;/g;
1229
1230 # try and create links for all occurrences of perl.* within
1231 # the preformatted text.
1232 $rest =~ s{
1233 (\s*)(perl\w+)
1234 }{
1235 if ( defined $pages{$2} ){ # is a link
1236 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1237 } elsif (defined $pages{dosify($2)}) { # is a link
1238 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1239 } else {
1240 "$1$2";
1241 }
1242 }xeg;
1243 $rest =~ s{
1244 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1245 }{
1246 my $url ;
1247 if ( $htmlfileurl ne '' ){
1248 # Here, we take advantage of the knowledge
1249 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1250 # Since $htmlroot eq '', we need to prepend $htmldir
1251 # on the fron of the link to get the absolute path
1252 # of the link's target. We check for a leading '/'
1253 # to avoid corrupting links that are #, file:, etc.
1254 my $old_url = $3 ;
1255 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1256 $url = relativize_url( "$old_url.html", $htmlfileurl );
1257 } else {
1258 $url = "$3.html" ;
1259 }
1260 "$1$url" ;
1261 }xeg;
1262
1263 # Look for embedded URLs and make them into links. We don't
1264 # relativize them since they are best left as the author intended.
1265
1266 my $urls = '(' . join ('|', qw{
54310121 1267 http
1268 telnet
1269 mailto
1270 news
1271 gopher
1272 file
1273 wais
1274 ftp
1275 } )
1276 . ')';
1277
2a28b791 1278 my $ltrs = '\w';
1279 my $gunk = '/#~:.?+=&%@!\-';
1280 my $punc = '.:?\-';
1281 my $any = "${ltrs}${gunk}${punc}";
54310121 1282
2a28b791 1283 $rest =~ s{
54310121 1284 \b # start at word boundary
1285 ( # begin $1 {
29f227c9 1286 $urls : # need resource and a colon
1287 (?!:) # Ignore File::, among others.
54310121 1288 [$any] +? # followed by on or more
1289 # of any valid character, but
1290 # be conservative and take only
1291 # what you need to....
1292 ) # end $1 }
1293 (?= # look-ahead non-consumptive assertion
1294 [$punc]* # either 0 or more puntuation
1295 [^$any] # followed by a non-url char
1296 | # or else
1297 $ # then end of the string
1298 )
1299 }{<A HREF="$1">$1</A>}igox;
1300
2a28b791 1301 # text should be as it is (verbatim)
1302 $$text = $rest;
1303}
54310121 1304
54310121 1305
2a28b791 1306#
1307# pure text processing
1308#
1309# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1310# we don't want this to happen within IS
1311#
1312sub pure_text($){
1313 my $text = shift();
1314 process_puretext( $text, \$ptQuote, 1 );
54310121 1315}
1316
2a28b791 1317sub inIS_text($){
1318 my $text = shift();
1319 process_puretext( $text, \$ptQuote, 0 );
1320}
54310121 1321
1322#
1323# process_puretext - process pure text (without pod-escapes) converting
1324# double-quotes and handling implicit C<> links.
1325#
1326sub process_puretext {
2a28b791 1327 my($text, $quote, $notinIS) = @_;
54310121 1328
2a28b791 1329 ## Guessing at func() or [$@%&]*var references in plain text is destined
1330 ## to produce some strange looking ref's. uncomment to disable:
1331 ## $notinIS = 0;
1332
1333 my(@words, $lead, $trail);
54310121 1334
2a28b791 1335 # convert double-quotes to single-quotes
1336 if( $$quote && $text =~ s/"/''/s ){
1337 $$quote = 0;
1338 }
1339 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1340 $$quote = 1 if $text =~ s/"/``/s;
54310121 1341
1342 # keep track of leading and trailing white-space
2a28b791 1343 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1344 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1345
2a28b791 1346 # split at space/non-space boundaries
1347 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1348
1349 # process each word individually
1350 foreach my $word (@words) {
2a28b791 1351 # skip space runs
1352 next if $word =~ /^\s*$/;
54310121 1353 # see if we can infer a link
02369fa5 1354 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1355 # has parenthesis so should have been a C<> ref
2a28b791 1356 ## try for a pagename (perlXXX(1))?
02369fa5 1357 my( $func, $args ) = ( $1, $2 );
1358 if( $args =~ /^\d+$/ ){
2a28b791 1359 my $url = page_sect( $word, '' );
1360 if( defined $url ){
1361 $word = "<A HREF=\"$url\">the $word manpage</A>";
1362 next;
1363 }
1364 }
02369fa5 1365 ## try function name for a link, append tt'ed argument list
1366 $word = emit_C( $func, '', "($args)");
2a28b791 1367
1368#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1369## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1370## # perl variables, should be a C<> ref
1371## $word = emit_C( $word );
1372
54310121 1373 } elsif ($word =~ m,^\w+://\w,) {
1374 # looks like a URL
5a039dd3 1375 # Don't relativize it: leave it as the author intended
54310121 1376 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1377 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1378 # looks like an e-mail address
7b8d334a 1379 my ($w1, $w2, $w3) = ("", $word, "");
1380 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1381 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1382 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1383 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1384 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1385 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1386 } else {
7b8d334a 1387 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1388 }
1389 }
1390
2a28b791 1391 # put everything back together
1392 return $lead . join( '', @words ) . $trail;
1393}
1394
54310121 1395
2a28b791 1396#
1397# process_text - handles plaintext that appears in the input pod file.
1398# there may be pod commands embedded within the text so those must be
1399# converted to html commands.
1400#
7ba65c74 1401
c68ea5d1 1402sub process_text1($$;$$);
1403sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1404sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1405
2a28b791 1406sub process_text {
1407 return if $ignore;
1408 my( $tref ) = @_;
1409 my $res = process_text1( 0, $tref );
1410 $$tref = $res;
1411}
1412
c68ea5d1 1413sub process_text1($$;$$){
1414 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791 1415 my $res = '';
1416
60a48b2d 1417 unless (defined $func) {
1418 $func = '';
1419 $lev++;
1420 }
1421
2a28b791 1422 if( $func eq 'B' ){
1423 # B<text> - boldface
1424 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1425
1426 } elsif( $func eq 'C' ){
1427 # C<code> - can be a ref or <CODE></CODE>
1428 # need to extract text
c68ea5d1 1429 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791 1430
1431 ## clean-up of the link target
1432 my $text = depod( $par );
1433
1434 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1435 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1436
1437 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1438
1439 } elsif( $func eq 'E' ){
1440 # E<x> - convert to character
4b19af01 1441 $$rstr =~ s/^([^>]*)>//;
1442 my $escape = $1;
1443 $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1444 $res = "&$escape;";
2a28b791 1445
1446 } elsif( $func eq 'F' ){
1447 # F<filename> - italizice
1448 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1449
1450 } elsif( $func eq 'I' ){
1451 # I<text> - italizice
1452 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1453
1454 } elsif( $func eq 'L' ){
1455 # L<link> - link
1456 ## L<text|cross-ref> => produce text, use cross-ref for linking
1457 ## L<cross-ref> => make text from cross-ref
1458 ## need to extract text
c68ea5d1 1459 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791 1460
1461 # some L<>'s that shouldn't be:
1462 # a) full-blown URL's are emitted as-is
1463 if( $par =~ m{^\w+://}s ){
1464 return make_URL_href( $par );
1465 }
1466 # b) C<...> is stripped and treated as C<>
1467 if( $par =~ /^C<(.*)>$/ ){
1468 my $text = depod( $1 );
1469 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1470 }
1471
1472 # analyze the contents
1473 $par =~ s/\n/ /g; # undo word-wrapped tags
1474 my $opar = $par;
1475 my $linktext;
1476 if( $par =~ s{^([^|]+)\|}{} ){
1477 $linktext = $1;
1478 }
1479
1480 # make sure sections start with a /
1481 $par =~ s{^"}{/"};
1482
1483 my( $page, $section, $ident );
1484
1485 # check for link patterns
1486 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1487 # we've got a name/ident (no quotes)
1488 ( $page, $ident ) = ( $1, $2 );
1489 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1490
1491 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1492 # even though this should be a "section", we go for ident first
1493 ( $page, $ident ) = ( $1, $2 );
1494 ### print STDERR "--> L<$par> to page $page, section $section\n";
1495
1496 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1497 ( $page, $section ) = ( '', $par );
1498 ### print STDERR "--> L<$par> to void page, section $section\n";
1499
1500 } else {
1501 ( $page, $section ) = ( $par, '' );
1502 ### print STDERR "--> L<$par> to page $par, void section\n";
1503 }
1504
1505 # now, either $section or $ident is defined. the convoluted logic
1506 # below tries to resolve L<> according to what the user specified.
1507 # failing this, we try to find the next best thing...
1508 my( $url, $ltext, $fid );
1509
1510 RESOLVE: {
1511 if( defined $ident ){
1512 ## try to resolve $ident as an item
1513 ( $url, $fid ) = coderef( $page, $ident );
1514 if( $url ){
1515 if( ! defined( $linktext ) ){
1516 $linktext = $ident;
1517 $linktext .= " in " if $ident && $page;
1518 $linktext .= "the $page manpage" if $page;
1519 }
1520 ### print STDERR "got coderef url=$url\n";
1521 last RESOLVE;
1522 }
1523 ## no luck: go for a section (auto-quoting!)
1524 $section = $ident;
1525 }
1526 ## now go for a section
1527 my $htmlsection = htmlify( $section );
1528 $url = page_sect( $page, $htmlsection );
1529 if( $url ){
1530 if( ! defined( $linktext ) ){
1531 $linktext = $section;
1532 $linktext .= " in " if $section && $page;
1533 $linktext .= "the $page manpage" if $page;
1534 }
1535 ### print STDERR "got page/section url=$url\n";
1536 last RESOLVE;
1537 }
1538 ## no luck: go for an ident
1539 if( $section ){
1540 $ident = $section;
1541 } else {
1542 $ident = $page;
1543 $page = undef();
1544 }
1545 ( $url, $fid ) = coderef( $page, $ident );
1546 if( $url ){
1547 if( ! defined( $linktext ) ){
1548 $linktext = $ident;
1549 $linktext .= " in " if $ident && $page;
1550 $linktext .= "the $page manpage" if $page;
1551 }
1552 ### print STDERR "got section=>coderef url=$url\n";
1553 last RESOLVE;
1554 }
1555
1556 # warning; show some text.
1557 $linktext = $opar unless defined $linktext;
1558 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1559 }
1560
1561 # now we have an URL or just plain code
1562 $$rstr = $linktext . '>' . $$rstr;
1563 if( defined( $url ) ){
1564 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1565 } else {
1566 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1567 }
1568
1569 } elsif( $func eq 'S' ){
1570 # S<text> - non-breaking spaces
1571 $res = process_text1( $lev, $rstr );
1572 $res =~ s/ /&nbsp;/g;
1573
1574 } elsif( $func eq 'X' ){
1575 # X<> - ignore
1576 $$rstr =~ s/^[^>]*>//;
1577
1578 } elsif( $func eq 'Z' ){
1579 # Z<> - empty
1580 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1581 unless $$rstr =~ s/^>//;
1582
1583 } else {
c68ea5d1 1584 my $term = pattern $closing;
1585 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1586 # all others: either recurse into new function or
c68ea5d1 1587 # terminate at closing angle bracket(s)
2a28b791 1588 my $pt = $1;
c68ea5d1 1589 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1590 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
c68ea5d1 1591 return $res if !$3 && $lev > 1;
1592 if( $3 ){
1593 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1594 }
2a28b791 1595 }
1596 if( $lev == 1 ){
1597 $res .= pure_text( $$rstr );
54310121 1598 } else {
2a28b791 1599 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1600 }
1601 }
2a28b791 1602 return $res;
1603}
54310121 1604
2a28b791 1605#
1606# go_ahead: extract text of an IS (can be nested)
1607#
c68ea5d1 1608sub go_ahead($$$){
1609 my( $rstr, $func, $closing ) = @_;
2a28b791 1610 my $res = '';
c68ea5d1 1611 my @closing = ($closing);
1612 while( $$rstr =~
1613 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1614 $res .= $1;
c68ea5d1 1615 unless( $3 ){
1616 shift @closing;
1617 return $res unless @closing;
2a28b791 1618 } else {
c68ea5d1 1619 unshift @closing, closing $4;
2a28b791 1620 }
1621 $res .= $2;
1622 }
1623 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1624 return $res;
54310121 1625}
1626
1627#
2a28b791 1628# emit_C - output result of C<text>
1629# $text is the depod-ed text
54310121 1630#
02369fa5 1631sub emit_C($;$$){
1632 my( $text, $nocode, $args ) = @_;
60a48b2d 1633 $args = '' unless defined $args;
2a28b791 1634 my $res;
1635 my( $url, $fid ) = coderef( undef(), $text );
1636
1637 # need HTML-safe text
02369fa5 1638 my $linktext = html_escape( "$text$args" );
2a28b791 1639
1640 if( defined( $url ) &&
1641 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1642 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1643 } elsif( 0 && $nocode ){
1644 $res = $linktext;
1645 } else {
1646 $res = "<CODE>$linktext</CODE>";
1647 }
1648 return $res;
54310121 1649}
1650
1651#
2a28b791 1652# html_escape: make text safe for HTML
1653#
1654sub html_escape {
1655 my $rest = $_[0];
1656 $rest =~ s/&/&amp;/g;
1657 $rest =~ s/</&lt;/g;
1658 $rest =~ s/>/&gt;/g;
1659 $rest =~ s/"/&quot;/g;
1660 return $rest;
1661}
1662
1663
1664#
39e571d4 1665# dosify - convert filenames to 8.3
1666#
1667sub dosify {
1668 my($str) = @_;
fe4c6be1 1669 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1670 if ($Is83) {
1671 $str = lc $str;
1672 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1673 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1674 }
1675 return $str;
1676}
1677
1678#
2a28b791 1679# page_sect - make an URL from the text of a L<>
54310121 1680#
2a28b791 1681sub page_sect($$) {
1682 my( $page, $section ) = @_;
1683 my( $linktext, $page83, $link); # work strings
1684
1685 # check if we know that this is a section in this page
1686 if (!defined $pages{$page} && defined $sections{$page}) {
1687 $section = $page;
1688 $page = "";
1689 ### print STDERR "reset page='', section=$section\n";
54310121 1690 }
1691
39e571d4 1692 $page83=dosify($page);
1693 $page=$page83 if (defined $pages{$page83});
54310121 1694 if ($page eq "") {
2a28b791 1695 $link = "#" . htmlify( $section );
350ccacd 1696 } elsif ( $page =~ /::/ ) {
350ccacd 1697 $page =~ s,::,/,g;
29f227c9 1698 # Search page cache for an entry keyed under the html page name,
1699 # then look to see what directory that page might be in. NOTE:
1700 # this will only find one page. A better solution might be to produce
1701 # an intermediate page that is an index to all such pages.
1702 my $page_name = $page ;
fe6f1558 1703 $page_name =~ s,^.*/,,s ;
29f227c9 1704 if ( defined( $pages{ $page_name } ) &&
1705 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1706 ) {
1707 $page = $1 ;
1708 }
1709 else {
1710 # NOTE: This branch assumes that all A::B pages are located in
1711 # $htmlroot/A/B.html . This is often incorrect, since they are
1712 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1713 # analyze the contents of %pages and figure out where any
1714 # cousins of A::B are, then assume that. So, if A::B isn't found,
1715 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1716 # lib/A/B.pm. This is also limited, but it's an improvement.
1717 # Maybe a hints file so that the links point to the correct places
2a28b791 1718 # nonetheless?
1719
29f227c9 1720 }
350ccacd 1721 $link = "$htmlroot/$page.html";
2a28b791 1722 $link .= "#" . htmlify( $section ) if ($section);
54310121 1723 } elsif (!defined $pages{$page}) {
54310121 1724 $link = "";
54310121 1725 } else {
2a28b791 1726 $section = htmlify( $section ) if $section ne "";
1727 ### print STDERR "...section=$section\n";
54310121 1728
1729 # if there is a directory by the name of the page, then assume that an
1730 # appropriate section will exist in the subdirectory
29f227c9 1731# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1732 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1733 $link = "$htmlroot/$1/$section.html";
2a28b791 1734 ### print STDERR "...link=$link\n";
54310121 1735
1736 # since there is no directory by the name of the page, the section will
1737 # have to exist within a .html of the same name. thus, make sure there
1738 # is a .pod or .pm that might become that .html
1739 } else {
2a28b791 1740 $section = "#$section" if $section;
1741 ### print STDERR "...section=$section\n";
1742
54310121 1743 # check if there is a .pod with the page name
1744 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1745 $link = "$htmlroot/$1.html$section";
1746 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1747 $link = "$htmlroot/$1.html$section";
1748 } else {
54310121 1749 $link = "";
54310121 1750 }
1751 }
1752 }
1753
54310121 1754 if ($link) {
29f227c9 1755 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1756 # implies $htmlroot eq ''. This means that the link in question
1757 # needs a prefix of $htmldir if it begins with '/'. The test for
1758 # the initial '/' is done to avoid '#'-only links, and to allow
1759 # for other kinds of links, like file:, ftp:, etc.
1760 my $url ;
1761 if ( $htmlfileurl ne '' ) {
fe6f1558 1762 $link = "$htmldir$link" if $link =~ m{^/}s;
2a28b791 1763 $url = relativize_url( $link, $htmlfileurl );
1764# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1765 }
1766 else {
1767 $url = $link ;
1768 }
2a28b791 1769 return $url;
29f227c9 1770
54310121 1771 } else {
2a28b791 1772 return undef();
54310121 1773 }
54310121 1774}
1775
1776#
29f227c9 1777# relativize_url - convert an absolute URL to one relative to a base URL.
1778# Assumes both end in a filename.
1779#
1780sub relativize_url {
1781 my ($dest,$source) = @_ ;
1782
1783 my ($dest_volume,$dest_directory,$dest_file) =
1784 File::Spec::Unix->splitpath( $dest ) ;
1785 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1786
1787 my ($source_volume,$source_directory,$source_file) =
1788 File::Spec::Unix->splitpath( $source ) ;
1789 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1790
1791 my $rel_path = '' ;
1792 if ( $dest ne '' ) {
1793 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1794 }
1795
1796 if ( $rel_path ne '' &&
1797 substr( $rel_path, -1 ) ne '/' &&
1798 substr( $dest_file, 0, 1 ) ne '#'
1799 ) {
1800 $rel_path .= "/$dest_file" ;
1801 }
1802 else {
1803 $rel_path .= "$dest_file" ;
1804 }
1805
1806 return $rel_path ;
1807}
1808
54310121 1809
1810#
2a28b791 1811# coderef - make URL from the text of a C<>
54310121 1812#
2a28b791 1813sub coderef($$){
1814 my( $page, $item ) = @_;
1815 my( $url );
1816
1817 my $fid = fragment_id( $item );
2a28b791 1818 if( defined( $page ) ){
1819 # we have been given a $page...
1820 $page =~ s{::}{/}g;
1821
1822 # Do we take it? Item could be a section!
228a48a5 1823 my $base = $items{$fid} || "";
2a28b791 1824 $base =~ s{[^/]*/}{};
1825 if( $base ne "$page.html" ){
1826 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1827 $page = undef();
1828 }
54310121 1829
2a28b791 1830 } else {
1831 # no page - local items precede cached items
67398a75 1832 if( defined( $fid ) ){
1833 if( exists $local_items{$fid} ){
1834 $page = $local_items{$fid};
1835 } else {
1836 $page = $items{$fid};
1837 }
2a28b791 1838 }
1839 }
54310121 1840
1841 # if there was a pod file that we found earlier with an appropriate
1842 # =item directive, then create a link to that page.
2a28b791 1843 if( defined $page ){
1844 if( $page ){
228a48a5 1845 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1846 $page = $1 . '.html';
29f227c9 1847 }
2a28b791 1848 my $link = "$htmlroot/$page#item_$fid";
54310121 1849
2a28b791 1850 # Here, we take advantage of the knowledge that $htmlfileurl
1851 # ne '' implies $htmlroot eq ''.
1852 if ( $htmlfileurl ne '' ) {
1853 $link = "$htmldir$link" ;
1854 $url = relativize_url( $link, $htmlfileurl ) ;
1855 } else {
1856 $url = $link ;
1857 }
1858 } else {
1859 $url = "#item_" . $fid;
1860 }
54310121 1861
2a28b791 1862 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1863 }
1864 return( $url, $fid );
54310121 1865}
1866
54310121 1867
1868
1869#
29f227c9 1870# Adapted from Nick Ing-Simmons' PodToHtml package.
1871sub relative_url {
1872 my $source_file = shift ;
1873 my $destination_file = shift;
1874
1875 my $source = URI::file->new_abs($source_file);
1876 my $uo = URI::file->new($destination_file,$source)->abs;
1877 return $uo->rel->as_string;
1878}
1879
1880
1881#
54310121 1882# finish_list - finish off any pending HTML lists. this should be called
1883# after the entire pod file has been read and converted.
1884#
1885sub finish_list {
7b8d334a 1886 while ($listlevel > 0) {
54310121 1887 print HTML "</DL>\n";
1888 $listlevel--;
1889 }
1890}
1891
1892#
1893# htmlify - converts a pod section specification to a suitable section
2a28b791 1894# specification for HTML. Note that we keep spaces and special characters
1895# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1896#
1897sub htmlify {
2a28b791 1898 my( $heading) = @_;
1899 $heading =~ s/(\s+)/ /g;
1900 $heading =~ s/\s+\Z//;
1901 $heading =~ s/\A\s+//;
1902 # The hyphen is a disgrace to the English language.
1903 $heading =~ s/[-"?]//g;
1904 $heading = lc( $heading );
1905 return $heading;
1906}
54310121 1907
2a28b791 1908#
1909# depod - convert text by eliminating all interior sequences
1910# Note: can be called with copy or modify semantics
1911#
1912my %E2c;
67398a75 1913$E2c{lt} = '<';
1914$E2c{gt} = '>';
1915$E2c{sol} = '/';
2a28b791 1916$E2c{verbar} = '|';
67398a75 1917$E2c{amp} = '&'; # in Tk's pods
2a28b791 1918
c68ea5d1 1919sub depod1($;$$);
7ba65c74 1920
2a28b791 1921sub depod($){
1922 my $string;
1923 if( ref( $_[0] ) ){
1924 $string = ${$_[0]};
1925 ${$_[0]} = depod1( \$string );
1926 } else {
1927 $string = $_[0];
1928 depod1( \$string );
1929 }
1930}
54310121 1931
c68ea5d1 1932sub depod1($;$$){
1933 my( $rstr, $func, $closing ) = @_;
2a28b791 1934 my $res = '';
228a48a5 1935 return $res unless defined $$rstr;
2a28b791 1936 if( ! defined( $func ) ){
1937 # skip to next begin of an interior sequence
c68ea5d1 1938 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2a28b791 1939 # recurse into its text
c68ea5d1 1940 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791 1941 }
1942 $res .= $$rstr;
1943 } elsif( $func eq 'E' ){
1944 # E<x> - convert to character
4b19af01 1945 $$rstr =~ s/^([^>]*)>//;
228a48a5 1946 $res .= $E2c{$1} || "";
2a28b791 1947 } elsif( $func eq 'X' ){
1948 # X<> - ignore
1949 $$rstr =~ s/^[^>]*>//;
1950 } elsif( $func eq 'Z' ){
1951 # Z<> - empty
1952 $$rstr =~ s/^>//;
1953 } else {
1954 # all others: either recurse into new function or
1955 # terminate at closing angle bracket
c68ea5d1 1956 my $term = pattern $closing;
1957 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2a28b791 1958 $res .= $1;
c68ea5d1 1959 last unless $3;
1960 $res .= depod1( $rstr, $3, closing $4 );
2a28b791 1961 }
1962 ## If we're here and $2 ne '>': undelimited interior sequence.
1963 ## Ignored, as this is called without proper indication of where we are.
1964 ## Rely on process_text to produce diagnostics.
1965 }
1966 return $res;
1967}
54310121 1968
2a28b791 1969#
1970# fragment_id - construct a fragment identifier from:
1971# a) =item text
1972# b) contents of C<...>
1973#
1974my @hc;
1975sub fragment_id {
1976 my $text = shift();
1977 $text =~ s/\s+\Z//s;
1978 if( $text ){
1979 # a method or function?
1980 return $1 if $text =~ /(\w+)\s*\(/;
1981 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1982
1983 # a variable name?
1984 return $1 if $text =~ /^([$@%*]\S+)/;
1985
1986 # some pattern matching operator?
1987 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1988
1989 # fancy stuff... like "do { }"
1990 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1991
1992 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1993 # and some funnies with ... Module ...
1994 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1995 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1996
1997 # text? normalize!
1998 $text =~ s/\s+/_/sg;
1999 $text =~ s{(\W)}{
2000 defined( $hc[ord($1)] ) ? $hc[ord($1)]
2001 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2002 $text = substr( $text, 0, 50 );
2003 } else {
2004 return undef();
2005 }
54310121 2006}
2007
2a28b791 2008#
2009# make_URL_href - generate HTML href from URL
2010# Special treatment for CGI queries.
2011#
2012sub make_URL_href($){
2013 my( $url ) = @_;
2014 if( $url !~
228a48a5 2015 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791 2016 $url = "<A HREF=\"$url\">$url</A>";
2017 }
2018 return $url;
54310121 2019}
2020
20211;