SYN SYN
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
0e4548d5 2use strict;
54310121 3require Exporter;
0e4548d5 4
5use vars qw($VERSION @ISA @EXPORT);
2a28b791 6$VERSION = 1.03;
0e4548d5 7@ISA = qw(Exporter);
54310121 8@EXPORT = qw(pod2html htmlify);
54310121 9
10use Carp;
0e4548d5 11use Config;
12use Cwd;
13use File::Spec::Unix;
14use Getopt::Long;
15use Pod::Functions;
54310121 16
3ec07288 17use locale; # make \w work right in non-ASCII lands
18
54310121 19=head1 NAME
20
7b8d334a 21Pod::Html - module to convert pod files to HTML
54310121 22
23=head1 SYNOPSIS
24
7b8d334a 25 use Pod::Html;
54310121 26 pod2html([options]);
27
28=head1 DESCRIPTION
29
30Converts files from pod format (see L<perlpod>) to HTML format. It
31can automatically generate indexes and cross-references, and it keeps
32a cache of things it knows how to cross-reference.
33
34=head1 ARGUMENTS
35
36Pod::Html takes the following arguments:
37
38=over 4
39
0e4548d5 40=item backlink
41
42 --backlink="Back to Top"
43
44Adds "Back to Top" links in front of every HEAD1 heading (except for
45the first). By default, no backlink are being generated.
46
47=item css
48
49 --css=stylesheet
50
51Specify the URL of a cascading style sheet.
52
53=item flush
54
55 --flush
56
57Flushes the item and directory caches.
58
59=item header
60
61 --header
62 --noheader
63
64Creates header and footer blocks containing the text of the NAME
65section. By default, no headers are being generated.
66
54310121 67=item help
68
69 --help
70
71Displays the usage message.
72
5a039dd3 73=item htmldir
74
75 --htmldir=name
76
77Sets the directory in which the resulting HTML file is placed. This
29f227c9 78is used to generate relative links to other files. Not passing this
79causes all links to be absolute, since this is the value that tells
80Pod::Html the root of the documentation tree.
5a039dd3 81
54310121 82=item htmlroot
83
84 --htmlroot=name
85
86Sets the base URL for the HTML files. When cross-references are made,
87the HTML root is prepended to the URL.
88
0e4548d5 89=item index
90
91 --index
92 --noindex
93
94Generate an index at the top of the HTML file. This is the default
95behaviour.
96
54310121 97=item infile
98
99 --infile=name
100
101Specify the pod file to convert. Input is taken from STDIN if no
102infile is specified.
103
54310121 104=item libpods
105
106 --libpods=name:...:name
107
108List of page names (eg, "perlfunc") which contain linkable C<=item>s.
109
110=item netscape
111
112 --netscape
0e4548d5 113 --nonetscape
54310121 114
0e4548d5 115Use Netscape HTML directives when applicable. By default, they will
116B<not> be used.
54310121 117
0e4548d5 118=item outfile
54310121 119
0e4548d5 120 --outfile=name
54310121 121
0e4548d5 122Specify the HTML file to create. Output goes to STDOUT if no outfile
123is specified.
54310121 124
0e4548d5 125=item podpath
54310121 126
0e4548d5 127 --podpath=name:...:name
54310121 128
0e4548d5 129Specify which subdirectories of the podroot contain pod files whose
130HTML converted forms can be linked-to in cross-references.
54310121 131
0e4548d5 132=item podroot
54310121 133
0e4548d5 134 --podroot=name
54310121 135
0e4548d5 136Specify the base directory for finding library pods.
54310121 137
0e4548d5 138=item quiet
54310121 139
0e4548d5 140 --quiet
141 --noquiet
54310121 142
0e4548d5 143Don't display I<mostly harmless> warning messages. These messages
144will be displayed by default. But this is not the same as C<verbose>
145mode.
54310121 146
0e4548d5 147=item recurse
54310121 148
0e4548d5 149 --recurse
54310121 150 --norecurse
151
0e4548d5 152Recurse into subdirectories specified in podpath (default behaviour).
54310121 153
154=item title
155
156 --title=title
157
158Specify the title of the resulting HTML file.
159
160=item verbose
161
162 --verbose
0e4548d5 163 --noverbose
54310121 164
0e4548d5 165Display progress messages. By default, they won't be displayed.
34db337b 166
54310121 167=back
168
169=head1 EXAMPLE
170
171 pod2html("pod2html",
172 "--podpath=lib:ext:pod:vms",
173 "--podroot=/usr/src/perl",
174 "--htmlroot=/perl/nmanual",
175 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
176 "--recurse",
177 "--infile=foo.pod",
178 "--outfile=/perl/nmanual/foo.html");
179
34db337b 180=head1 ENVIRONMENT
181
182Uses $Config{pod2html} to setup default options.
183
54310121 184=head1 AUTHOR
185
186Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
187
54310121 188=head1 SEE ALSO
189
190L<perlpod>
191
192=head1 COPYRIGHT
193
194This program is distributed under the Artistic License.
195
196=cut
197
1f763251 198my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
199my $dircache = "pod2htmd$cache_ext";
200my $itemcache = "pod2htmi$cache_ext";
54310121 201
202my @begin_stack = (); # begin/end stack
203
29f227c9 204my @libpods = (); # files to search for links from C<> directives
205my $htmlroot = "/"; # http-server base directory from which all
54310121 206 # relative paths in $podpath stem.
5a039dd3 207my $htmldir = ""; # The directory to which the html pages
208 # will (eventually) be written.
54310121 209my $htmlfile = ""; # write to stdout by default
29f227c9 210my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 211 # refer to this file. This is only used
212 # to make relative urls that point to
213 # other files.
54310121 214my $podfile = ""; # read from stdin by default
215my @podpath = (); # list of directories containing library pods.
216my $podroot = "."; # filesystem base directory from which all
217 # relative paths in $podpath stem.
34db337b 218my $css = ''; # Cascading style sheet
54310121 219my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 220my $quiet = 0; # not quiet by default
54310121 221my $verbose = 0; # not verbose by default
222my $doindex = 1; # non-zero if we should generate an index
0e4548d5 223my $backlink = ''; # text for "back to top" links
54310121 224my $listlevel = 0; # current list depth
2a28b791 225my @listend = (); # the text to use to end the list.
226my $after_lpar = 0; # set to true after a par in an =item
54310121 227my $ignore = 1; # whether or not to format text. we don't
228 # format text until we hit our first pod
229 # directive.
230
231my %items_named = (); # for the multiples of the same item in perlfunc
232my @items_seen = ();
233my $netscape = 0; # whether or not to use netscape directives.
234my $title; # title to give the pod(s)
34db337b 235my $header = 0; # produce block header/footer
54310121 236my $top = 1; # true if we are at the top of the doc. used
237 # to prevent the first <HR> directive.
238my $paragraph; # which paragraph we're processing (used
239 # for error messages)
2a28b791 240my $ptQuote = 0; # status of double-quote conversion
54310121 241my %pages = (); # associative array used to find the location
242 # of pages referenced by L<> links.
243my %sections = (); # sections within this page
244my %items = (); # associative array used to find the location
245 # of =item directives referenced by C<> links
2a28b791 246my %local_items = (); # local items - avoid destruction of %items
39e571d4 247my $Is83; # is dos with short filenames (8.3)
248
54310121 249sub init_globals {
04f720db 250$dircache = "pod2htmd$cache_ext";
251$itemcache = "pod2htmi$cache_ext";
54310121 252
253@begin_stack = (); # begin/end stack
254
255@libpods = (); # files to search for links from C<> directives
256$htmlroot = "/"; # http-server base directory from which all
257 # relative paths in $podpath stem.
67398a75 258$htmldir = ""; # The directory to which the html pages
259 # will (eventually) be written.
54310121 260$htmlfile = ""; # write to stdout by default
261$podfile = ""; # read from stdin by default
262@podpath = (); # list of directories containing library pods.
263$podroot = "."; # filesystem base directory from which all
264 # relative paths in $podpath stem.
34db337b 265$css = ''; # Cascading style sheet
54310121 266$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 267$quiet = 0; # not quiet by default
54310121 268$verbose = 0; # not verbose by default
269$doindex = 1; # non-zero if we should generate an index
0e4548d5 270$backlink = ''; # text for "back to top" links
54310121 271$listlevel = 0; # current list depth
2a28b791 272@listend = (); # the text to use to end the list.
273$after_lpar = 0; # set to true after a par in an =item
54310121 274$ignore = 1; # whether or not to format text. we don't
275 # format text until we hit our first pod
276 # directive.
277
278@items_seen = ();
279%items_named = ();
280$netscape = 0; # whether or not to use netscape directives.
34db337b 281$header = 0; # produce block header/footer
54310121 282$title = ''; # title to give the pod(s)
283$top = 1; # true if we are at the top of the doc. used
284 # to prevent the first <HR> directive.
285$paragraph = ''; # which paragraph we're processing (used
286 # for error messages)
54310121 287%sections = (); # sections within this page
3e3baf6d 288
289# These are not reinitialised here but are kept as a cache.
290# See get_cache and related cache management code.
291#%pages = (); # associative array used to find the location
292 # of pages referenced by L<> links.
293#%items = (); # associative array used to find the location
54310121 294 # of =item directives referenced by C<> links
2a28b791 295%local_items = ();
39e571d4 296$Is83=$^O eq 'dos';
54310121 297}
298
2a28b791 299#
300# clean_data: global clean-up of pod data
301#
302sub clean_data($){
303 my( $dataref ) = @_;
304 my $i;
305 for( $i = 0; $i <= $#$dataref; $i++ ){
306 ${$dataref}[$i] =~ s/\s+\Z//;
307
308 # have a look for all-space lines
309 if( ${$dataref}[$i] =~ /^\s+$/m ){
310 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
311 splice( @$dataref, $i, 1, @chunks );
312 }
313 }
314}
315
316
54310121 317sub pod2html {
318 local(@ARGV) = @_;
319 local($/);
320 local $_;
321
322 init_globals();
323
39e571d4 324 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
325
54310121 326 # cache of %pages and %items from last time we ran pod2html
54310121 327
328 #undef $opt_help if defined $opt_help;
329
330 # parse the command-line parameters
331 parse_command_line();
332
333 # set some variables to their default values if necessary
334 local *POD;
335 unless (@ARGV && $ARGV[0]) {
336 $podfile = "-" unless $podfile; # stdin
337 open(POD, "<$podfile")
338 || die "$0: cannot open $podfile file for input: $!\n";
339 } else {
340 $podfile = $ARGV[0]; # XXX: might be more filenames
341 *POD = *ARGV;
342 }
343 $htmlfile = "-" unless $htmlfile; # stdout
344 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
fe6f1558 345 $htmldir =~ s#/\z## ; # so we don't get a //
29f227c9 346 if ( $htmlroot eq ''
347 && defined( $htmldir )
348 && $htmldir ne ''
349 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
350 )
5a039dd3 351 {
29f227c9 352 # Set the 'base' url for this file, so that we can use it
353 # as the location from which to calculate relative links
354 # to other files. If this is '', then absolute links will
355 # be used throughout.
356 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 357 }
54310121 358
359 # read the pod a paragraph at a time
360 warn "Scanning for sections in input file(s)\n" if $verbose;
361 $/ = "";
362 my @poddata = <POD>;
363 close(POD);
2a28b791 364 clean_data( \@poddata );
54310121 365
366 # scan the pod for =head[1-6] directives and build an index
367 my $index = scan_headings(\%sections, @poddata);
368
3e3baf6d 369 unless($index) {
31e56455 370 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 371 }
372
54310121 373 # open the output file
374 open(HTML, ">$htmlfile")
375 || die "$0: cannot open $htmlfile file for output: $!\n";
376
d011ffae 377 # put a title in the HTML file if one wasn't specified
378 if ($title eq '') {
379 TITLE_SEARCH: {
380 for (my $i = 0; $i < @poddata; $i++) {
381 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
382 for my $para ( @poddata[$i, $i+1] ) {
383 last TITLE_SEARCH
384 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
385 }
386 }
54310121 387
d011ffae 388 }
389 }
390 }
fe6f1558 391 if (!$title and $podfile =~ /\.pod\z/) {
3e3baf6d 392 # probably a split pod so take first =head[12] as title
393 for (my $i = 0; $i < @poddata; $i++) {
394 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
395 }
396 warn "adopted '$title' as title for $podfile\n"
397 if $verbose and $title;
398 }
7b8d334a 399 if ($title) {
400 $title =~ s/\s*\(.*\)//;
401 } else {
34db337b 402 warn "$0: no title for $podfile" unless $quiet;
fe6f1558 403 $podfile =~ /^(.*)(\.[^.\/]+)?\z/s;
54310121 404 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 405 warn "using $title" if $verbose;
54310121 406 }
34db337b 407 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
408 $csslink =~ s,\\,/,g;
409 $csslink =~ s,(/.):,$1|,;
410
411 my $block = $header ? <<END_OF_BLOCK : '';
412<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
413<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
414<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
415</TD></TR>
416</TABLE>
417END_OF_BLOCK
418
54310121 419 print HTML <<END_OF_HEAD;
7b8d334a 420<HTML>
421<HEAD>
34db337b 422<TITLE>$title</TITLE>$csslink
7b8d334a 423<LINK REV="made" HREF="mailto:$Config{perladmin}">
424</HEAD>
54310121 425
7b8d334a 426<BODY>
34db337b 427$block
54310121 428END_OF_HEAD
429
3e3baf6d 430 # load/reload/validate/cache %pages and %items
431 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 432
433 # scan the pod for =item directives
2a28b791 434 scan_items( \%local_items, "", @poddata);
54310121 435
436 # put an index at the top of the file. note, if $doindex is 0 we
437 # still generate an index, but surround it with an html comment.
438 # that way some other program can extract it if desired.
439 $index =~ s/--+/-/g;
2a28b791 440 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 441 print HTML "<!-- INDEX BEGIN -->\n";
442 print HTML "<!--\n" unless $doindex;
443 print HTML $index;
444 print HTML "-->\n" unless $doindex;
445 print HTML "<!-- INDEX END -->\n\n";
31e56455 446 print HTML "<HR>\n" if $doindex and $index;
54310121 447
448 # now convert this file
2a28b791 449 my $after_item; # set to true after an =item
450 warn "Converting input file $podfile\n" if $verbose;
451 foreach my $i (0..$#poddata){
452 $ptQuote = 0; # status of quote conversion
453
54310121 454 $_ = $poddata[$i];
455 $paragraph = $i+1;
456 if (/^(=.*)/s) { # is it a pod directive?
457 $ignore = 0;
2a28b791 458 $after_item = 0;
54310121 459 $_ = $1;
460 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
461 process_begin($1, $2);
462 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
463 process_end($1, $2);
464 } elsif (/^=cut/) { # =cut
465 process_cut();
466 } elsif (/^=pod/) { # =pod
467 process_pod();
468 } else {
469 next if @begin_stack && $begin_stack[-1] ne 'html';
470
7b8d334a 471 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 472 process_head( $1, $2, $doindex && $index );
473 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
474 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 475 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791 476 process_item( $1 );
477 $after_item = 1;
54310121 478 } elsif (/^=over\s*(.*)/) { # =over N
479 process_over();
480 } elsif (/^=back/) { # =back
481 process_back();
a45bd81d 482 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121 483 process_for($1,$2);
484 } else {
485 /^=(\S*)\s*/;
486 warn "$0: $podfile: unknown pod directive '$1' in "
487 . "paragraph $paragraph. ignoring.\n";
488 }
489 }
490 $top = 0;
491 }
492 else {
493 next if $ignore;
494 next if @begin_stack && $begin_stack[-1] ne 'html';
495 my $text = $_;
2a28b791 496 if( $text =~ /\A\s+/ ){
497 process_pre( \$text );
498 print HTML "<PRE>\n$text</PRE>\n";
499
500 } else {
501 process_text( \$text );
502
503 # experimental: check for a paragraph where all lines
504 # have some ...\t...\t...\n pattern
505 if( $text =~ /\t/ ){
506 my @lines = split( "\n", $text );
507 if( @lines > 1 ){
508 my $all = 2;
509 foreach my $line ( @lines ){
510 if( $line =~ /\S/ && $line !~ /\t/ ){
511 $all--;
512 last if $all == 0;
513 }
514 }
515 if( $all > 0 ){
516 $text =~ s/\t+/<TD>/g;
517 $text =~ s/^/<TR><TD>/gm;
518 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
519 $text . '</TABLE>';
520 }
521 }
522 }
523 ## end of experimental
524
525 if( $after_item ){
526 print HTML "$text\n";
527 $after_lpar = 1;
528 } else {
529 print HTML "<P>$text</P>\n";
530 }
531 }
532 $after_item = 0;
54310121 533 }
534 }
535
536 # finish off any pending directives
537 finish_list();
2a28b791 538
539 # link to page index
0e4548d5 540 print HTML "<P><A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A></P>\n"
541 if $doindex and $index and $backlink;
2a28b791 542
54310121 543 print HTML <<END_OF_TAIL;
34db337b 544$block
7b8d334a 545</BODY>
54310121 546
7b8d334a 547</HTML>
54310121 548END_OF_TAIL
549
550 # close the html file
551 close(HTML);
552
553 warn "Finished\n" if $verbose;
554}
555
556##############################################################################
557
558my $usage; # see below
559sub usage {
560 my $podfile = shift;
561 warn "$0: $podfile: @_\n" if @_;
562 die $usage;
563}
564
565$usage =<<END_OF_USAGE;
566Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
567 --podpath=<name>:...:<name> --podroot=<name>
568 --libpods=<name>:...:<name> --recurse --verbose --index
569 --netscape --norecurse --noindex
570
0e4548d5 571 --backlink - set text for "back to top" links (default: none).
572 --css - stylesheet URL
573 --flush - flushes the item and directory caches.
574 --[no]header - produce block header/footer (default is no headers).
575 --help - prints this message.
576 --htmldir - directory for resulting HTML files.
577 --htmlroot - http-server base directory from which all relative paths
578 in podpath stem (default is /).
579 --[no]index - generate an index at the top of the resulting html
580 (default behaviour).
581 --infile - filename for the pod to convert (input taken from stdin
582 by default).
583 --libpods - colon-separated list of pages to search for =item pod
584 directives in as targets of C<> and implicit links (empty
585 by default). note, these are not filenames, but rather
586 page names like those that appear in L<> links.
587 --[no]netscape - will use netscape html directives when applicable.
588 (default is not to use them).
589 --outfile - filename for the resulting html file (output sent to
590 stdout by default).
591 --podpath - colon-separated list of directories containing library
592 pods (empty by default).
593 --podroot - filesystem base directory from which all relative paths
594 in podpath stem (default is .).
595 --[no]quiet - supress some benign warning messages (default is off).
596 --[no]recurse - recurse on those subdirectories listed in podpath
597 (default behaviour).
598 --title - title that will appear in resulting html file.
599 --[no]verbose - self-explanatory (off by default).
54310121 600
601END_OF_USAGE
602
603sub parse_command_line {
0e4548d5 604 my ($opt_backlink,$opt_css,$opt_flush,$opt_header,$opt_help,$opt_htmldir,
605 $opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,
606 $opt_outfile,$opt_podpath,$opt_podroot,$opt_quiet,$opt_recurse,
607 $opt_title,$opt_verbose);
608
34db337b 609 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 610 my $result = GetOptions(
0e4548d5 611 'backlink=s' => \$opt_backlink,
612 'css=s' => \$opt_css,
29f227c9 613 'flush' => \$opt_flush,
0e4548d5 614 'header!' => \$opt_header,
29f227c9 615 'help' => \$opt_help,
616 'htmldir=s' => \$opt_htmldir,
54310121 617 'htmlroot=s' => \$opt_htmlroot,
29f227c9 618 'index!' => \$opt_index,
54310121 619 'infile=s' => \$opt_infile,
620 'libpods=s' => \$opt_libpods,
621 'netscape!' => \$opt_netscape,
622 'outfile=s' => \$opt_outfile,
623 'podpath=s' => \$opt_podpath,
624 'podroot=s' => \$opt_podroot,
0e4548d5 625 'quiet!' => \$opt_quiet,
54310121 626 'recurse!' => \$opt_recurse,
627 'title=s' => \$opt_title,
0e4548d5 628 'verbose!' => \$opt_verbose,
54310121 629 );
630 usage("-", "invalid parameters") if not $result;
631
632 usage("-") if defined $opt_help; # see if the user asked for help
633 $opt_help = ""; # just to make -w shut-up.
634
54310121 635 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
636 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
637
0e4548d5 638 $backlink = $opt_backlink if defined $opt_backlink;
639 $css = $opt_css if defined $opt_css;
640 $header = $opt_header if defined $opt_header;
641 $htmldir = $opt_htmldir if defined $opt_htmldir;
642 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
643 $doindex = $opt_index if defined $opt_index;
644 $podfile = $opt_infile if defined $opt_infile;
645 $netscape = $opt_netscape if defined $opt_netscape;
646 $htmlfile = $opt_outfile if defined $opt_outfile;
647 $podroot = $opt_podroot if defined $opt_podroot;
648 $quiet = $opt_quiet if defined $opt_quiet;
649 $recurse = $opt_recurse if defined $opt_recurse;
650 $title = $opt_title if defined $opt_title;
651 $verbose = $opt_verbose if defined $opt_verbose;
652
54310121 653 warn "Flushing item and directory caches\n"
654 if $opt_verbose && defined $opt_flush;
655 unlink($dircache, $itemcache) if defined $opt_flush;
54310121 656}
657
3e3baf6d 658
659my $saved_cache_key;
660
661sub get_cache {
662 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
663 my @cache_key_args = @_;
664
665 # A first-level cache:
666 # Don't bother reading the cache files if they still apply
667 # and haven't changed since we last read them.
668
669 my $this_cache_key = cache_key(@cache_key_args);
670
671 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
672
673 # load the cache of %pages and %items if possible. $tests will be
674 # non-zero if successful.
675 my $tests = 0;
676 if (-f $dircache && -f $itemcache) {
677 warn "scanning for item cache\n" if $verbose;
678 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
679 }
680
681 # if we didn't succeed in loading the cache then we must (re)build
682 # %pages and %items.
683 if (!$tests) {
684 warn "scanning directories in pod-path\n" if $verbose;
685 scan_podpath($podroot, $recurse, 0);
686 }
687 $saved_cache_key = cache_key(@cache_key_args);
688}
689
690sub cache_key {
691 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
692 return join('!', $dircache, $itemcache, $recurse,
29f227c9 693 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 694}
695
54310121 696#
3e3baf6d 697# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 698# are valid caches of %pages and %items. if they are valid then it loads
699# them and returns a non-zero value.
700#
3e3baf6d 701sub load_cache {
54310121 702 my($dircache, $itemcache, $podpath, $podroot) = @_;
703 my($tests);
704 local $_;
705
706 $tests = 0;
707
708 open(CACHE, "<$itemcache") ||
709 die "$0: error opening $itemcache for reading: $!\n";
710 $/ = "\n";
711
712 # is it the same podpath?
713 $_ = <CACHE>;
714 chomp($_);
3e3baf6d 715 $tests++ if (join(":", @$podpath) eq $_);
54310121 716
717 # is it the same podroot?
718 $_ = <CACHE>;
719 chomp($_);
720 $tests++ if ($podroot eq $_);
721
722 # load the cache if its good
723 if ($tests != 2) {
724 close(CACHE);
54310121 725 return 0;
726 }
727
728 warn "loading item cache\n" if $verbose;
729 while (<CACHE>) {
730 /(.*?) (.*)$/;
731 $items{$1} = $2;
732 }
733 close(CACHE);
734
735 warn "scanning for directory cache\n" if $verbose;
736 open(CACHE, "<$dircache") ||
737 die "$0: error opening $dircache for reading: $!\n";
738 $/ = "\n";
739 $tests = 0;
740
741 # is it the same podpath?
742 $_ = <CACHE>;
743 chomp($_);
3e3baf6d 744 $tests++ if (join(":", @$podpath) eq $_);
54310121 745
746 # is it the same podroot?
747 $_ = <CACHE>;
748 chomp($_);
749 $tests++ if ($podroot eq $_);
750
751 # load the cache if its good
752 if ($tests != 2) {
753 close(CACHE);
54310121 754 return 0;
755 }
756
757 warn "loading directory cache\n" if $verbose;
758 while (<CACHE>) {
759 /(.*?) (.*)$/;
760 $pages{$1} = $2;
761 }
762
763 close(CACHE);
764
765 return 1;
766}
767
768#
769# scan_podpath - scans the directories specified in @podpath for directories,
770# .pod files, and .pm files. it also scans the pod files specified in
771# @libpods for =item directives.
772#
773sub scan_podpath {
3e3baf6d 774 my($podroot, $recurse, $append) = @_;
54310121 775 my($pwd, $dir);
776 my($libpod, $dirname, $pod, @files, @poddata);
777
3e3baf6d 778 unless($append) {
779 %items = ();
780 %pages = ();
781 }
782
54310121 783 # scan each directory listed in @podpath
784 $pwd = getcwd();
785 chdir($podroot)
786 || die "$0: error changing to directory $podroot: $!\n";
787 foreach $dir (@podpath) {
788 scan_dir($dir, $recurse);
789 }
790
791 # scan the pods listed in @libpods for =item directives
792 foreach $libpod (@libpods) {
793 # if the page isn't defined then we won't know where to find it
794 # on the system.
795 next unless defined $pages{$libpod} && $pages{$libpod};
796
797 # if there is a directory then use the .pod and .pm files within it.
29f227c9 798 # NOTE: Only finds the first so-named directory in the tree.
799# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
800 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 801 # find all the .pod and .pm files within the directory
802 $dirname = $1;
803 opendir(DIR, $dirname) ||
804 die "$0: error opening directory $dirname: $!\n";
fe6f1558 805 @files = grep(/(\.pod|\.pm)\z/ && ! -d $_, readdir(DIR));
54310121 806 closedir(DIR);
807
808 # scan each .pod and .pm file for =item directives
809 foreach $pod (@files) {
810 open(POD, "<$dirname/$pod") ||
811 die "$0: error opening $dirname/$pod for input: $!\n";
812 @poddata = <POD>;
813 close(POD);
2a28b791 814 clean_data( \@poddata );
54310121 815
2a28b791 816 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 817 }
818
819 # use the names of files as =item directives too.
2a28b791 820### Don't think this should be done this way - confuses issues.(WL)
821### foreach $pod (@files) {
822### $pod =~ /^(.*)(\.pod|\.pm)$/;
823### $items{$1} = "$dirname/$1.html" if $1;
824### }
54310121 825 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
826 $pages{$libpod} =~ /([^:]*\.pm):/) {
827 # scan the .pod or .pm file for =item directives
828 $pod = $1;
829 open(POD, "<$pod") ||
830 die "$0: error opening $pod for input: $!\n";
831 @poddata = <POD>;
832 close(POD);
2a28b791 833 clean_data( \@poddata );
54310121 834
2a28b791 835 scan_items( \%items, "$pod", @poddata);
54310121 836 } else {
837 warn "$0: shouldn't be here (line ".__LINE__."\n";
838 }
839 }
840 @poddata = (); # clean-up a bit
841
842 chdir($pwd)
843 || die "$0: error changing to directory $pwd: $!\n";
844
845 # cache the item list for later use
846 warn "caching items for later use\n" if $verbose;
847 open(CACHE, ">$itemcache") ||
848 die "$0: error open $itemcache for writing: $!\n";
849
850 print CACHE join(":", @podpath) . "\n$podroot\n";
851 foreach my $key (keys %items) {
852 print CACHE "$key $items{$key}\n";
853 }
854
855 close(CACHE);
856
857 # cache the directory list for later use
858 warn "caching directories for later use\n" if $verbose;
859 open(CACHE, ">$dircache") ||
860 die "$0: error open $dircache for writing: $!\n";
861
862 print CACHE join(":", @podpath) . "\n$podroot\n";
863 foreach my $key (keys %pages) {
864 print CACHE "$key $pages{$key}\n";
865 }
866
867 close(CACHE);
868}
869
870#
871# scan_dir - scans the directory specified in $dir for subdirectories, .pod
872# files, and .pm files. notes those that it finds. this information will
873# be used later in order to figure out where the pages specified in L<>
874# links are on the filesystem.
875#
876sub scan_dir {
877 my($dir, $recurse) = @_;
878 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
879 local $_;
880
881 @subdirs = ();
882 @pods = ();
883
884 opendir(DIR, $dir) ||
885 die "$0: error opening directory $dir: $!\n";
886 while (defined($_ = readdir(DIR))) {
887 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
888 $pages{$_} = "" unless defined $pages{$_};
889 $pages{$_} .= "$dir/$_:";
890 push(@subdirs, $_);
fe6f1558 891 } elsif (/\.pod\z/) { # .pod
892 s/\.pod\z//;
54310121 893 $pages{$_} = "" unless defined $pages{$_};
894 $pages{$_} .= "$dir/$_.pod:";
895 push(@pods, "$dir/$_.pod");
22d4bb9c 896 } elsif (/\.html\z/) { # .html
897 s/\.html\z//;
898 $pages{$_} = "" unless defined $pages{$_};
899 $pages{$_} .= "$dir/$_.pod:";
fe6f1558 900 } elsif (/\.pm\z/) { # .pm
901 s/\.pm\z//;
54310121 902 $pages{$_} = "" unless defined $pages{$_};
903 $pages{$_} .= "$dir/$_.pm:";
904 push(@pods, "$dir/$_.pm");
905 }
906 }
907 closedir(DIR);
908
909 # recurse on the subdirectories if necessary
910 if ($recurse) {
911 foreach my $subdir (@subdirs) {
912 scan_dir("$dir/$subdir", $recurse);
913 }
914 }
915}
916
917#
918# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
919# build an index.
920#
921sub scan_headings {
922 my($sections, @data) = @_;
2a28b791 923 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 924
be173d55 925 # here we need local $ignore = 0;
926 # unfortunately, we can't have it, because $ignore is lexical
927 $ignore = 0;
928
54310121 929 $listdepth = 0;
930 $index = "";
931
932 # scan for =head directives, note their name, and build an index
933 # pointing to each of them.
934 foreach my $line (@data) {
bb9460ed 935 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 936 ($tag, $which_head, $otitle) = ($1,$2,$3);
937
938 my $title = depod( $otitle );
939 my $name = htmlify( $title );
940 $$sections{$name} = 1;
941 $title = process_text( \$otitle );
54310121 942
102c538a 943 while ($which_head != $listdepth) {
944 if ($which_head > $listdepth) {
945 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
946 $listdepth++;
947 } elsif ($which_head < $listdepth) {
948 $listdepth--;
949 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
950 }
54310121 951 }
54310121 952
953 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 954 "<A HREF=\"#" . $name . "\">" .
955 $title . "</A></LI>";
54310121 956 }
957 }
958
959 # finish off the lists
960 while ($listdepth--) {
961 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
962 }
963
964 # get rid of bogus lists
965 $index =~ s,\t*<UL>\s*</UL>\n,,g;
966
bb9460ed 967 $ignore = 1; # restore old value;
be173d55 968
54310121 969 return $index;
970}
971
972#
973# scan_items - scans the pod specified by $pod for =item directives. we
974# will use this information later on in resolving C<> links.
975#
976sub scan_items {
2a28b791 977 my( $itemref, $pod, @poddata ) = @_;
54310121 978 my($i, $item);
979 local $_;
980
fe6f1558 981 $pod =~ s/\.pod\z//;
54310121 982 $pod .= ".html" if $pod;
983
984 foreach $i (0..$#poddata) {
2a28b791 985 my $txt = depod( $poddata[$i] );
986
987 # figure out what kind of item it is.
988 # Build string for referencing this item.
989 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
990 next unless $1;
991 $item = $1;
992 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
993 $item = $1;
994 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
995 $item = $1;
996 } else {
997 next;
54310121 998 }
2a28b791 999 my $fid = fragment_id( $item );
1000 $$itemref{$fid} = "$pod" if $fid;
54310121 1001 }
1002}
1003
1004#
1005# process_head - convert a pod head[1-6] tag and convert it to HTML format.
1006#
1007sub process_head {
2a28b791 1008 my($tag, $heading, $hasindex) = @_;
54310121 1009
1010 # figure out the level of the =head
1011 $tag =~ /head([1-6])/;
1012 my $level = $1;
1013
2a28b791 1014 if( $listlevel ){
1015 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
1016 while( $listlevel ){
1017 process_back();
1018 }
1019 }
1020
1021 print HTML "<P>\n";
1022 if( $level == 1 && ! $top ){
0e4548d5 1023 print HTML "<A HREF=\"#__index__\"><SMALL>$backlink</SMALL></A>\n"
1024 if $hasindex and $backlink;
2a28b791 1025 print HTML "<HR>\n"
1026 }
1027
1028 my $name = htmlify( depod( $heading ) );
1029 my $convert = process_text( \$heading );
1030 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1031}
1032
2a28b791 1033
54310121 1034#
2a28b791 1035# emit_item_tag - print an =item's text
1036# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1037#
2a28b791 1038my $EmittedItem;
1039
1040sub emit_item_tag($$$){
1041 my( $otext, $text, $compact ) = @_;
1042 my $item = fragment_id( $text );
54310121 1043
2a28b791 1044 $EmittedItem = $item;
1045 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1046
2a28b791 1047 print HTML '<STRONG>';
1048 if ($items_named{$item}++) {
1049 print HTML process_text( \$otext );
1050 } else {
1051 my $name = 'item_' . $item;
1052 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1053 }
1054 print HTML "</STRONG><BR>\n";
1055 undef( $EmittedItem );
1056}
1057
1058sub emit_li {
1059 my( $tag ) = @_;
1060 if( $items_seen[$listlevel]++ == 0 ){
1061 push( @listend, "</$tag>" );
1062 print HTML "<$tag>\n";
1063 }
1064 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1065}
1066
1067#
1068# process_item - convert a pod item tag and convert it to HTML format.
1069#
1070sub process_item {
1071 my( $otext ) = @_;
54310121 1072
1073 # lots of documents start a list without doing an =over. this is
1074 # bad! but, the proper thing to do seems to be to just assume
1075 # they did do an =over. so warn them once and then continue.
2a28b791 1076 if( $listlevel == 0 ){
1077 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1078 process_over();
1079 }
54310121 1080
2a28b791 1081 # formatting: insert a paragraph if preceding item has >1 paragraph
1082 if( $after_lpar ){
1083 print HTML "<P></P>\n";
1084 $after_lpar = 0;
1085 }
54310121 1086
1087 # remove formatting instructions from the text
2a28b791 1088 my $text = depod( $otext );
1089
1090 # all the list variants:
1091 if( $text =~ /\A\*/ ){ # bullet
1092 emit_li( 'UL' );
1093 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1094 my $tag = $1;
1095 $otext =~ s/\A\*\s+//;
1096 emit_item_tag( $otext, $tag, 1 );
54310121 1097 }
1098
2a28b791 1099 } elsif( $text =~ /\A\d+/ ){ # numbered list
1100 emit_li( 'OL' );
1101 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1102 my $tag = $1;
1103 $otext =~ s/\A\d+\.?\s*//;
1104 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1105 }
54310121 1106
2a28b791 1107 } else { # definition list
1108 emit_li( 'DL' );
1109 if ($text =~ /\A(.+)\Z/s ){ # should have text
1110 emit_item_tag( $otext, $text, 1 );
7b8d334a 1111 }
54310121 1112 print HTML '<DD>';
1113 }
54310121 1114 print HTML "\n";
1115}
1116
1117#
2a28b791 1118# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1119#
1120sub process_over {
1121 # start a new list
1122 $listlevel++;
2a28b791 1123 push( @items_seen, 0 );
1124 $after_lpar = 0;
54310121 1125}
1126
1127#
1128# process_back - process a pod back tag and convert it to HTML format.
1129#
1130sub process_back {
2a28b791 1131 if( $listlevel == 0 ){
1132 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1133 return;
1134 }
54310121 1135
1136 # close off the list. note, I check to see if $listend[$listlevel] is
1137 # defined because an =item directive may have never appeared and thus
1138 # $listend[$listlevel] may have never been initialized.
1139 $listlevel--;
2a28b791 1140 if( defined $listend[$listlevel] ){
1141 print HTML '<P></P>' if $after_lpar;
1142 print HTML $listend[$listlevel];
1143 print HTML "\n";
1144 pop( @listend );
1145 }
1146 $after_lpar = 0;
54310121 1147
2a28b791 1148 # clean up item count
1149 pop( @items_seen );
54310121 1150}
1151
1152#
2a28b791 1153# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1154#
1155sub process_cut {
1156 $ignore = 1;
1157}
1158
1159#
2a28b791 1160# process_pod - process a pod pod tag, thus stop ignoring pod directives
1161# until we see a corresponding cut.
54310121 1162#
1163sub process_pod {
1164 # no need to set $ignore to 0 cause the main loop did it
1165}
1166
1167#
2a28b791 1168# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1169# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1170#
1171sub process_for {
1172 my($whom, $text) = @_;
1173 if ( $whom =~ /^(pod2)?html$/i) {
1174 print HTML $text;
c4d9b39d 1175 } elsif ($whom =~ /^illustration$/i) {
1176 1 while chomp $text;
1177 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1178 $text .= $ext, last if -r "$text$ext";
1179 }
1180 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1181 }
54310121 1182}
1183
1184#
1185# process_begin - process a =begin pod tag. this pushes
1186# whom we're beginning on the begin stack. if there's a
1187# begin stack, we only print if it us.
1188#
1189sub process_begin {
1190 my($whom, $text) = @_;
1191 $whom = lc($whom);
1192 push (@begin_stack, $whom);
1193 if ( $whom =~ /^(pod2)?html$/) {
1194 print HTML $text if $text;
1195 }
1196}
1197
1198#
1199# process_end - process a =end pod tag. pop the
1200# begin stack. die if we're mismatched.
1201#
1202sub process_end {
1203 my($whom, $text) = @_;
1204 $whom = lc($whom);
1205 if ($begin_stack[-1] ne $whom ) {
1206 die "Unmatched begin/end at chunk $paragraph\n"
1207 }
2a28b791 1208 pop( @begin_stack );
54310121 1209}
1210
1211#
2a28b791 1212# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1213#
2a28b791 1214sub process_pre {
1215 my( $text ) = @_;
1216 my( $rest );
54310121 1217 return if $ignore;
1218
54310121 1219 $rest = $$text;
1220
2a28b791 1221 # insert spaces in place of tabs
1222 $rest =~ s#.*#
be173d55 1223 my $line = $&;
1224 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1225 $line;
1226 #eg;
54310121 1227
2a28b791 1228 # convert some special chars to HTML escapes
1229 $rest =~ s/&/&amp;/g;
1230 $rest =~ s/</&lt;/g;
1231 $rest =~ s/>/&gt;/g;
1232 $rest =~ s/"/&quot;/g;
1233
1234 # try and create links for all occurrences of perl.* within
1235 # the preformatted text.
1236 $rest =~ s{
1237 (\s*)(perl\w+)
1238 }{
1239 if ( defined $pages{$2} ){ # is a link
1240 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1241 } elsif (defined $pages{dosify($2)}) { # is a link
1242 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1243 } else {
1244 "$1$2";
1245 }
1246 }xeg;
1247 $rest =~ s{
1248 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1249 }{
1250 my $url ;
1251 if ( $htmlfileurl ne '' ){
1252 # Here, we take advantage of the knowledge
1253 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1254 # Since $htmlroot eq '', we need to prepend $htmldir
1255 # on the fron of the link to get the absolute path
1256 # of the link's target. We check for a leading '/'
1257 # to avoid corrupting links that are #, file:, etc.
1258 my $old_url = $3 ;
1259 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1260 $url = relativize_url( "$old_url.html", $htmlfileurl );
1261 } else {
1262 $url = "$3.html" ;
1263 }
1264 "$1$url" ;
1265 }xeg;
1266
1267 # Look for embedded URLs and make them into links. We don't
1268 # relativize them since they are best left as the author intended.
1269
1270 my $urls = '(' . join ('|', qw{
54310121 1271 http
1272 telnet
1273 mailto
1274 news
1275 gopher
1276 file
1277 wais
1278 ftp
1279 } )
1280 . ')';
1281
2a28b791 1282 my $ltrs = '\w';
1283 my $gunk = '/#~:.?+=&%@!\-';
1284 my $punc = '.:?\-';
1285 my $any = "${ltrs}${gunk}${punc}";
54310121 1286
2a28b791 1287 $rest =~ s{
54310121 1288 \b # start at word boundary
1289 ( # begin $1 {
29f227c9 1290 $urls : # need resource and a colon
1291 (?!:) # Ignore File::, among others.
54310121 1292 [$any] +? # followed by on or more
1293 # of any valid character, but
1294 # be conservative and take only
1295 # what you need to....
1296 ) # end $1 }
1297 (?= # look-ahead non-consumptive assertion
1298 [$punc]* # either 0 or more puntuation
1299 [^$any] # followed by a non-url char
1300 | # or else
1301 $ # then end of the string
1302 )
1303 }{<A HREF="$1">$1</A>}igox;
1304
2a28b791 1305 # text should be as it is (verbatim)
1306 $$text = $rest;
1307}
54310121 1308
54310121 1309
2a28b791 1310#
1311# pure text processing
1312#
1313# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1314# we don't want this to happen within IS
1315#
1316sub pure_text($){
1317 my $text = shift();
1318 process_puretext( $text, \$ptQuote, 1 );
54310121 1319}
1320
2a28b791 1321sub inIS_text($){
1322 my $text = shift();
1323 process_puretext( $text, \$ptQuote, 0 );
1324}
54310121 1325
1326#
1327# process_puretext - process pure text (without pod-escapes) converting
1328# double-quotes and handling implicit C<> links.
1329#
1330sub process_puretext {
2a28b791 1331 my($text, $quote, $notinIS) = @_;
54310121 1332
2a28b791 1333 ## Guessing at func() or [$@%&]*var references in plain text is destined
1334 ## to produce some strange looking ref's. uncomment to disable:
1335 ## $notinIS = 0;
1336
1337 my(@words, $lead, $trail);
54310121 1338
2a28b791 1339 # convert double-quotes to single-quotes
1340 if( $$quote && $text =~ s/"/''/s ){
1341 $$quote = 0;
1342 }
1343 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1344 $$quote = 1 if $text =~ s/"/``/s;
54310121 1345
1346 # keep track of leading and trailing white-space
2a28b791 1347 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1348 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1349
2a28b791 1350 # split at space/non-space boundaries
1351 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1352
1353 # process each word individually
1354 foreach my $word (@words) {
2a28b791 1355 # skip space runs
1356 next if $word =~ /^\s*$/;
54310121 1357 # see if we can infer a link
02369fa5 1358 if( $notinIS && $word =~ /^(\w+)\((.*)\)$/ ) {
54310121 1359 # has parenthesis so should have been a C<> ref
2a28b791 1360 ## try for a pagename (perlXXX(1))?
02369fa5 1361 my( $func, $args ) = ( $1, $2 );
1362 if( $args =~ /^\d+$/ ){
2a28b791 1363 my $url = page_sect( $word, '' );
1364 if( defined $url ){
1365 $word = "<A HREF=\"$url\">the $word manpage</A>";
1366 next;
1367 }
1368 }
02369fa5 1369 ## try function name for a link, append tt'ed argument list
1370 $word = emit_C( $func, '', "($args)");
2a28b791 1371
1372#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1373## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1374## # perl variables, should be a C<> ref
1375## $word = emit_C( $word );
1376
54310121 1377 } elsif ($word =~ m,^\w+://\w,) {
1378 # looks like a URL
5a039dd3 1379 # Don't relativize it: leave it as the author intended
54310121 1380 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1381 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1382 # looks like an e-mail address
7b8d334a 1383 my ($w1, $w2, $w3) = ("", $word, "");
1384 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1385 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1386 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1387 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1388 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1389 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1390 } else {
7b8d334a 1391 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1392 }
1393 }
1394
2a28b791 1395 # put everything back together
1396 return $lead . join( '', @words ) . $trail;
1397}
1398
54310121 1399
2a28b791 1400#
1401# process_text - handles plaintext that appears in the input pod file.
1402# there may be pod commands embedded within the text so those must be
1403# converted to html commands.
1404#
7ba65c74 1405
c68ea5d1 1406sub process_text1($$;$$);
1407sub pattern ($) { $_[0] ? '[^\S\n]+'.('>' x ($_[0] + 1)) : '>' }
1408sub closing ($) { local($_) = shift; (defined && s/\s+$//) ? length : 0 }
7ba65c74 1409
2a28b791 1410sub process_text {
1411 return if $ignore;
1412 my( $tref ) = @_;
1413 my $res = process_text1( 0, $tref );
1414 $$tref = $res;
1415}
1416
c68ea5d1 1417sub process_text1($$;$$){
1418 my( $lev, $rstr, $func, $closing ) = @_;
2a28b791 1419 my $res = '';
1420
60a48b2d 1421 unless (defined $func) {
1422 $func = '';
1423 $lev++;
1424 }
1425
2a28b791 1426 if( $func eq 'B' ){
1427 # B<text> - boldface
1428 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1429
1430 } elsif( $func eq 'C' ){
1431 # C<code> - can be a ref or <CODE></CODE>
1432 # need to extract text
c68ea5d1 1433 my $par = go_ahead( $rstr, 'C', $closing );
2a28b791 1434
1435 ## clean-up of the link target
1436 my $text = depod( $par );
1437
1438 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1439 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1440
1441 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1442
1443 } elsif( $func eq 'E' ){
1444 # E<x> - convert to character
4b19af01 1445 $$rstr =~ s/^([^>]*)>//;
1446 my $escape = $1;
1447 $escape =~ s/^(\d+|X[\dA-F]+)$/#$1/i;
1448 $res = "&$escape;";
2a28b791 1449
1450 } elsif( $func eq 'F' ){
1451 # F<filename> - italizice
1452 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1453
1454 } elsif( $func eq 'I' ){
1455 # I<text> - italizice
1456 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1457
1458 } elsif( $func eq 'L' ){
1459 # L<link> - link
1460 ## L<text|cross-ref> => produce text, use cross-ref for linking
1461 ## L<cross-ref> => make text from cross-ref
1462 ## need to extract text
c68ea5d1 1463 my $par = go_ahead( $rstr, 'L', $closing );
2a28b791 1464
1465 # some L<>'s that shouldn't be:
1466 # a) full-blown URL's are emitted as-is
1467 if( $par =~ m{^\w+://}s ){
1468 return make_URL_href( $par );
1469 }
1470 # b) C<...> is stripped and treated as C<>
1471 if( $par =~ /^C<(.*)>$/ ){
1472 my $text = depod( $1 );
1473 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1474 }
1475
1476 # analyze the contents
1477 $par =~ s/\n/ /g; # undo word-wrapped tags
1478 my $opar = $par;
1479 my $linktext;
1480 if( $par =~ s{^([^|]+)\|}{} ){
1481 $linktext = $1;
1482 }
1483
1484 # make sure sections start with a /
1485 $par =~ s{^"}{/"};
1486
1487 my( $page, $section, $ident );
1488
1489 # check for link patterns
1490 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1491 # we've got a name/ident (no quotes)
1492 ( $page, $ident ) = ( $1, $2 );
1493 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1494
1495 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1496 # even though this should be a "section", we go for ident first
1497 ( $page, $ident ) = ( $1, $2 );
1498 ### print STDERR "--> L<$par> to page $page, section $section\n";
1499
1500 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1501 ( $page, $section ) = ( '', $par );
1502 ### print STDERR "--> L<$par> to void page, section $section\n";
1503
1504 } else {
1505 ( $page, $section ) = ( $par, '' );
1506 ### print STDERR "--> L<$par> to page $par, void section\n";
1507 }
1508
1509 # now, either $section or $ident is defined. the convoluted logic
1510 # below tries to resolve L<> according to what the user specified.
1511 # failing this, we try to find the next best thing...
1512 my( $url, $ltext, $fid );
1513
1514 RESOLVE: {
1515 if( defined $ident ){
1516 ## try to resolve $ident as an item
1517 ( $url, $fid ) = coderef( $page, $ident );
1518 if( $url ){
1519 if( ! defined( $linktext ) ){
1520 $linktext = $ident;
1521 $linktext .= " in " if $ident && $page;
1522 $linktext .= "the $page manpage" if $page;
1523 }
1524 ### print STDERR "got coderef url=$url\n";
1525 last RESOLVE;
1526 }
1527 ## no luck: go for a section (auto-quoting!)
1528 $section = $ident;
1529 }
1530 ## now go for a section
1531 my $htmlsection = htmlify( $section );
1532 $url = page_sect( $page, $htmlsection );
1533 if( $url ){
1534 if( ! defined( $linktext ) ){
1535 $linktext = $section;
1536 $linktext .= " in " if $section && $page;
1537 $linktext .= "the $page manpage" if $page;
1538 }
1539 ### print STDERR "got page/section url=$url\n";
1540 last RESOLVE;
1541 }
1542 ## no luck: go for an ident
1543 if( $section ){
1544 $ident = $section;
1545 } else {
1546 $ident = $page;
1547 $page = undef();
1548 }
1549 ( $url, $fid ) = coderef( $page, $ident );
1550 if( $url ){
1551 if( ! defined( $linktext ) ){
1552 $linktext = $ident;
1553 $linktext .= " in " if $ident && $page;
1554 $linktext .= "the $page manpage" if $page;
1555 }
1556 ### print STDERR "got section=>coderef url=$url\n";
1557 last RESOLVE;
1558 }
1559
1560 # warning; show some text.
1561 $linktext = $opar unless defined $linktext;
1562 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1563 }
1564
1565 # now we have an URL or just plain code
1566 $$rstr = $linktext . '>' . $$rstr;
1567 if( defined( $url ) ){
1568 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1569 } else {
1570 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1571 }
1572
1573 } elsif( $func eq 'S' ){
1574 # S<text> - non-breaking spaces
1575 $res = process_text1( $lev, $rstr );
1576 $res =~ s/ /&nbsp;/g;
1577
1578 } elsif( $func eq 'X' ){
1579 # X<> - ignore
1580 $$rstr =~ s/^[^>]*>//;
1581
1582 } elsif( $func eq 'Z' ){
1583 # Z<> - empty
1584 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1585 unless $$rstr =~ s/^>//;
1586
1587 } else {
c68ea5d1 1588 my $term = pattern $closing;
1589 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)//s ){
2a28b791 1590 # all others: either recurse into new function or
c68ea5d1 1591 # terminate at closing angle bracket(s)
2a28b791 1592 my $pt = $1;
c68ea5d1 1593 $pt .= $2 if !$3 && $lev == 1;
2a28b791 1594 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
c68ea5d1 1595 return $res if !$3 && $lev > 1;
1596 if( $3 ){
1597 $res .= process_text1( $lev, $rstr, $3, closing $4 );
1598 }
2a28b791 1599 }
1600 if( $lev == 1 ){
1601 $res .= pure_text( $$rstr );
54310121 1602 } else {
2a28b791 1603 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1604 }
1605 }
2a28b791 1606 return $res;
1607}
54310121 1608
2a28b791 1609#
1610# go_ahead: extract text of an IS (can be nested)
1611#
c68ea5d1 1612sub go_ahead($$$){
1613 my( $rstr, $func, $closing ) = @_;
2a28b791 1614 my $res = '';
c68ea5d1 1615 my @closing = ($closing);
1616 while( $$rstr =~
1617 s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|@{[pattern $closing[0]]})//s ){
2a28b791 1618 $res .= $1;
c68ea5d1 1619 unless( $3 ){
1620 shift @closing;
1621 return $res unless @closing;
2a28b791 1622 } else {
c68ea5d1 1623 unshift @closing, closing $4;
2a28b791 1624 }
1625 $res .= $2;
1626 }
1627 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1628 return $res;
54310121 1629}
1630
1631#
2a28b791 1632# emit_C - output result of C<text>
1633# $text is the depod-ed text
54310121 1634#
02369fa5 1635sub emit_C($;$$){
1636 my( $text, $nocode, $args ) = @_;
60a48b2d 1637 $args = '' unless defined $args;
2a28b791 1638 my $res;
1639 my( $url, $fid ) = coderef( undef(), $text );
1640
1641 # need HTML-safe text
02369fa5 1642 my $linktext = html_escape( "$text$args" );
2a28b791 1643
1644 if( defined( $url ) &&
1645 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1646 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1647 } elsif( 0 && $nocode ){
1648 $res = $linktext;
1649 } else {
1650 $res = "<CODE>$linktext</CODE>";
1651 }
1652 return $res;
54310121 1653}
1654
1655#
2a28b791 1656# html_escape: make text safe for HTML
1657#
1658sub html_escape {
1659 my $rest = $_[0];
1660 $rest =~ s/&/&amp;/g;
1661 $rest =~ s/</&lt;/g;
1662 $rest =~ s/>/&gt;/g;
1663 $rest =~ s/"/&quot;/g;
1664 return $rest;
1665}
1666
1667
1668#
39e571d4 1669# dosify - convert filenames to 8.3
1670#
1671sub dosify {
1672 my($str) = @_;
fe4c6be1 1673 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1674 if ($Is83) {
1675 $str = lc $str;
1676 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1677 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1678 }
1679 return $str;
1680}
1681
1682#
2a28b791 1683# page_sect - make an URL from the text of a L<>
54310121 1684#
2a28b791 1685sub page_sect($$) {
1686 my( $page, $section ) = @_;
1687 my( $linktext, $page83, $link); # work strings
1688
1689 # check if we know that this is a section in this page
1690 if (!defined $pages{$page} && defined $sections{$page}) {
1691 $section = $page;
1692 $page = "";
1693 ### print STDERR "reset page='', section=$section\n";
54310121 1694 }
1695
39e571d4 1696 $page83=dosify($page);
1697 $page=$page83 if (defined $pages{$page83});
54310121 1698 if ($page eq "") {
2a28b791 1699 $link = "#" . htmlify( $section );
350ccacd 1700 } elsif ( $page =~ /::/ ) {
350ccacd 1701 $page =~ s,::,/,g;
29f227c9 1702 # Search page cache for an entry keyed under the html page name,
1703 # then look to see what directory that page might be in. NOTE:
1704 # this will only find one page. A better solution might be to produce
1705 # an intermediate page that is an index to all such pages.
1706 my $page_name = $page ;
fe6f1558 1707 $page_name =~ s,^.*/,,s ;
29f227c9 1708 if ( defined( $pages{ $page_name } ) &&
1709 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1710 ) {
1711 $page = $1 ;
1712 }
1713 else {
1714 # NOTE: This branch assumes that all A::B pages are located in
1715 # $htmlroot/A/B.html . This is often incorrect, since they are
1716 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1717 # analyze the contents of %pages and figure out where any
1718 # cousins of A::B are, then assume that. So, if A::B isn't found,
1719 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1720 # lib/A/B.pm. This is also limited, but it's an improvement.
1721 # Maybe a hints file so that the links point to the correct places
2a28b791 1722 # nonetheless?
1723
29f227c9 1724 }
350ccacd 1725 $link = "$htmlroot/$page.html";
2a28b791 1726 $link .= "#" . htmlify( $section ) if ($section);
54310121 1727 } elsif (!defined $pages{$page}) {
54310121 1728 $link = "";
54310121 1729 } else {
2a28b791 1730 $section = htmlify( $section ) if $section ne "";
1731 ### print STDERR "...section=$section\n";
54310121 1732
1733 # if there is a directory by the name of the page, then assume that an
1734 # appropriate section will exist in the subdirectory
29f227c9 1735# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1736 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1737 $link = "$htmlroot/$1/$section.html";
2a28b791 1738 ### print STDERR "...link=$link\n";
54310121 1739
1740 # since there is no directory by the name of the page, the section will
1741 # have to exist within a .html of the same name. thus, make sure there
1742 # is a .pod or .pm that might become that .html
1743 } else {
2a28b791 1744 $section = "#$section" if $section;
1745 ### print STDERR "...section=$section\n";
1746
54310121 1747 # check if there is a .pod with the page name
1748 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1749 $link = "$htmlroot/$1.html$section";
1750 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1751 $link = "$htmlroot/$1.html$section";
1752 } else {
54310121 1753 $link = "";
54310121 1754 }
1755 }
1756 }
1757
54310121 1758 if ($link) {
29f227c9 1759 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1760 # implies $htmlroot eq ''. This means that the link in question
1761 # needs a prefix of $htmldir if it begins with '/'. The test for
1762 # the initial '/' is done to avoid '#'-only links, and to allow
1763 # for other kinds of links, like file:, ftp:, etc.
1764 my $url ;
1765 if ( $htmlfileurl ne '' ) {
fe6f1558 1766 $link = "$htmldir$link" if $link =~ m{^/}s;
2a28b791 1767 $url = relativize_url( $link, $htmlfileurl );
1768# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1769 }
1770 else {
1771 $url = $link ;
1772 }
2a28b791 1773 return $url;
29f227c9 1774
54310121 1775 } else {
2a28b791 1776 return undef();
54310121 1777 }
54310121 1778}
1779
1780#
29f227c9 1781# relativize_url - convert an absolute URL to one relative to a base URL.
1782# Assumes both end in a filename.
1783#
1784sub relativize_url {
1785 my ($dest,$source) = @_ ;
1786
1787 my ($dest_volume,$dest_directory,$dest_file) =
1788 File::Spec::Unix->splitpath( $dest ) ;
1789 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1790
1791 my ($source_volume,$source_directory,$source_file) =
1792 File::Spec::Unix->splitpath( $source ) ;
1793 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1794
1795 my $rel_path = '' ;
1796 if ( $dest ne '' ) {
1797 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1798 }
1799
1800 if ( $rel_path ne '' &&
1801 substr( $rel_path, -1 ) ne '/' &&
1802 substr( $dest_file, 0, 1 ) ne '#'
1803 ) {
1804 $rel_path .= "/$dest_file" ;
1805 }
1806 else {
1807 $rel_path .= "$dest_file" ;
1808 }
1809
1810 return $rel_path ;
1811}
1812
54310121 1813
1814#
2a28b791 1815# coderef - make URL from the text of a C<>
54310121 1816#
2a28b791 1817sub coderef($$){
1818 my( $page, $item ) = @_;
1819 my( $url );
1820
1821 my $fid = fragment_id( $item );
2a28b791 1822 if( defined( $page ) ){
1823 # we have been given a $page...
1824 $page =~ s{::}{/}g;
1825
1826 # Do we take it? Item could be a section!
228a48a5 1827 my $base = $items{$fid} || "";
2a28b791 1828 $base =~ s{[^/]*/}{};
1829 if( $base ne "$page.html" ){
1830 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1831 $page = undef();
1832 }
54310121 1833
2a28b791 1834 } else {
1835 # no page - local items precede cached items
67398a75 1836 if( defined( $fid ) ){
1837 if( exists $local_items{$fid} ){
1838 $page = $local_items{$fid};
1839 } else {
1840 $page = $items{$fid};
1841 }
2a28b791 1842 }
1843 }
54310121 1844
1845 # if there was a pod file that we found earlier with an appropriate
1846 # =item directive, then create a link to that page.
2a28b791 1847 if( defined $page ){
1848 if( $page ){
228a48a5 1849 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1850 $page = $1 . '.html';
29f227c9 1851 }
2a28b791 1852 my $link = "$htmlroot/$page#item_$fid";
54310121 1853
2a28b791 1854 # Here, we take advantage of the knowledge that $htmlfileurl
1855 # ne '' implies $htmlroot eq ''.
1856 if ( $htmlfileurl ne '' ) {
1857 $link = "$htmldir$link" ;
1858 $url = relativize_url( $link, $htmlfileurl ) ;
1859 } else {
1860 $url = $link ;
1861 }
1862 } else {
1863 $url = "#item_" . $fid;
1864 }
54310121 1865
2a28b791 1866 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1867 }
1868 return( $url, $fid );
54310121 1869}
1870
54310121 1871
1872
1873#
29f227c9 1874# Adapted from Nick Ing-Simmons' PodToHtml package.
1875sub relative_url {
1876 my $source_file = shift ;
1877 my $destination_file = shift;
1878
1879 my $source = URI::file->new_abs($source_file);
1880 my $uo = URI::file->new($destination_file,$source)->abs;
1881 return $uo->rel->as_string;
1882}
1883
1884
1885#
54310121 1886# finish_list - finish off any pending HTML lists. this should be called
1887# after the entire pod file has been read and converted.
1888#
1889sub finish_list {
7b8d334a 1890 while ($listlevel > 0) {
54310121 1891 print HTML "</DL>\n";
1892 $listlevel--;
1893 }
1894}
1895
1896#
1897# htmlify - converts a pod section specification to a suitable section
2a28b791 1898# specification for HTML. Note that we keep spaces and special characters
1899# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1900#
1901sub htmlify {
2a28b791 1902 my( $heading) = @_;
1903 $heading =~ s/(\s+)/ /g;
1904 $heading =~ s/\s+\Z//;
1905 $heading =~ s/\A\s+//;
1906 # The hyphen is a disgrace to the English language.
1907 $heading =~ s/[-"?]//g;
1908 $heading = lc( $heading );
1909 return $heading;
1910}
54310121 1911
2a28b791 1912#
1913# depod - convert text by eliminating all interior sequences
1914# Note: can be called with copy or modify semantics
1915#
1916my %E2c;
67398a75 1917$E2c{lt} = '<';
1918$E2c{gt} = '>';
1919$E2c{sol} = '/';
2a28b791 1920$E2c{verbar} = '|';
67398a75 1921$E2c{amp} = '&'; # in Tk's pods
2a28b791 1922
c68ea5d1 1923sub depod1($;$$);
7ba65c74 1924
2a28b791 1925sub depod($){
1926 my $string;
1927 if( ref( $_[0] ) ){
1928 $string = ${$_[0]};
1929 ${$_[0]} = depod1( \$string );
1930 } else {
1931 $string = $_[0];
1932 depod1( \$string );
1933 }
1934}
54310121 1935
c68ea5d1 1936sub depod1($;$$){
1937 my( $rstr, $func, $closing ) = @_;
2a28b791 1938 my $res = '';
228a48a5 1939 return $res unless defined $$rstr;
2a28b791 1940 if( ! defined( $func ) ){
1941 # skip to next begin of an interior sequence
c68ea5d1 1942 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<(<+[^\S\n]+)?// ){
2a28b791 1943 # recurse into its text
c68ea5d1 1944 $res .= $1 . depod1( $rstr, $2, closing $3);
2a28b791 1945 }
1946 $res .= $$rstr;
1947 } elsif( $func eq 'E' ){
1948 # E<x> - convert to character
4b19af01 1949 $$rstr =~ s/^([^>]*)>//;
228a48a5 1950 $res .= $E2c{$1} || "";
2a28b791 1951 } elsif( $func eq 'X' ){
1952 # X<> - ignore
1953 $$rstr =~ s/^[^>]*>//;
1954 } elsif( $func eq 'Z' ){
1955 # Z<> - empty
1956 $$rstr =~ s/^>//;
1957 } else {
1958 # all others: either recurse into new function or
1959 # terminate at closing angle bracket
c68ea5d1 1960 my $term = pattern $closing;
1961 while( $$rstr =~ s/\A(.*?)(([BCEFILSXZ])<(<+[^\S\n]+)?|$term)// ){
2a28b791 1962 $res .= $1;
c68ea5d1 1963 last unless $3;
1964 $res .= depod1( $rstr, $3, closing $4 );
2a28b791 1965 }
1966 ## If we're here and $2 ne '>': undelimited interior sequence.
1967 ## Ignored, as this is called without proper indication of where we are.
1968 ## Rely on process_text to produce diagnostics.
1969 }
1970 return $res;
1971}
54310121 1972
2a28b791 1973#
1974# fragment_id - construct a fragment identifier from:
1975# a) =item text
1976# b) contents of C<...>
1977#
1978my @hc;
1979sub fragment_id {
1980 my $text = shift();
1981 $text =~ s/\s+\Z//s;
1982 if( $text ){
1983 # a method or function?
1984 return $1 if $text =~ /(\w+)\s*\(/;
1985 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1986
1987 # a variable name?
1988 return $1 if $text =~ /^([$@%*]\S+)/;
1989
1990 # some pattern matching operator?
1991 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1992
1993 # fancy stuff... like "do { }"
1994 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1995
1996 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1997 # and some funnies with ... Module ...
1998 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1999 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
2000
2001 # text? normalize!
2002 $text =~ s/\s+/_/sg;
2003 $text =~ s{(\W)}{
2004 defined( $hc[ord($1)] ) ? $hc[ord($1)]
2005 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
2006 $text = substr( $text, 0, 50 );
2007 } else {
2008 return undef();
2009 }
54310121 2010}
2011
2a28b791 2012#
2013# make_URL_href - generate HTML href from URL
2014# Special treatment for CGI queries.
2015#
2016sub make_URL_href($){
2017 my( $url ) = @_;
2018 if( $url !~
228a48a5 2019 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791 2020 $url = "<A HREF=\"$url\">$url</A>";
2021 }
2022 return $url;
54310121 2023}
2024
20251;