heavy cleanup of Pod::Html bug fixes (from Wolfgang Laun
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
2a28b791 8$VERSION = 1.03;
54310121 9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288 15use locale; # make \w work right in non-ASCII lands
16
54310121 17use strict;
18
7b8d334a 19use Config;
20
54310121 21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121 24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121 28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3 48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9 53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121 57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
34db337b 140=item css
141
142 --css=stylesheet
143
144Specify the URL of a cascading style sheet.
145
54310121 146=item verbose
147
148 --verbose
149
150Display progress messages.
151
34db337b 152=item quiet
153
154 --quiet
155
156Don't display I<mostly harmless> warning messages.
157
54310121 158=back
159
160=head1 EXAMPLE
161
162 pod2html("pod2html",
163 "--podpath=lib:ext:pod:vms",
164 "--podroot=/usr/src/perl",
165 "--htmlroot=/perl/nmanual",
166 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
167 "--recurse",
168 "--infile=foo.pod",
169 "--outfile=/perl/nmanual/foo.html");
170
34db337b 171=head1 ENVIRONMENT
172
173Uses $Config{pod2html} to setup default options.
174
54310121 175=head1 AUTHOR
176
177Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
178
54310121 179=head1 SEE ALSO
180
181L<perlpod>
182
183=head1 COPYRIGHT
184
185This program is distributed under the Artistic License.
186
187=cut
188
1f763251 189my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
190my $dircache = "pod2htmd$cache_ext";
191my $itemcache = "pod2htmi$cache_ext";
54310121 192
193my @begin_stack = (); # begin/end stack
194
29f227c9 195my @libpods = (); # files to search for links from C<> directives
196my $htmlroot = "/"; # http-server base directory from which all
54310121 197 # relative paths in $podpath stem.
5a039dd3 198my $htmldir = ""; # The directory to which the html pages
199 # will (eventually) be written.
54310121 200my $htmlfile = ""; # write to stdout by default
29f227c9 201my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 202 # refer to this file. This is only used
203 # to make relative urls that point to
204 # other files.
54310121 205my $podfile = ""; # read from stdin by default
206my @podpath = (); # list of directories containing library pods.
207my $podroot = "."; # filesystem base directory from which all
208 # relative paths in $podpath stem.
34db337b 209my $css = ''; # Cascading style sheet
54310121 210my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 211my $quiet = 0; # not quiet by default
54310121 212my $verbose = 0; # not verbose by default
213my $doindex = 1; # non-zero if we should generate an index
214my $listlevel = 0; # current list depth
2a28b791 215my @listend = (); # the text to use to end the list.
216my $after_lpar = 0; # set to true after a par in an =item
54310121 217my $ignore = 1; # whether or not to format text. we don't
218 # format text until we hit our first pod
219 # directive.
220
221my %items_named = (); # for the multiples of the same item in perlfunc
222my @items_seen = ();
223my $netscape = 0; # whether or not to use netscape directives.
224my $title; # title to give the pod(s)
34db337b 225my $header = 0; # produce block header/footer
54310121 226my $top = 1; # true if we are at the top of the doc. used
227 # to prevent the first <HR> directive.
228my $paragraph; # which paragraph we're processing (used
229 # for error messages)
2a28b791 230my $ptQuote = 0; # status of double-quote conversion
54310121 231my %pages = (); # associative array used to find the location
232 # of pages referenced by L<> links.
233my %sections = (); # sections within this page
234my %items = (); # associative array used to find the location
235 # of =item directives referenced by C<> links
2a28b791 236my %local_items = (); # local items - avoid destruction of %items
39e571d4 237my $Is83; # is dos with short filenames (8.3)
238
54310121 239sub init_globals {
04f720db 240$dircache = "pod2htmd$cache_ext";
241$itemcache = "pod2htmi$cache_ext";
54310121 242
243@begin_stack = (); # begin/end stack
244
245@libpods = (); # files to search for links from C<> directives
246$htmlroot = "/"; # http-server base directory from which all
247 # relative paths in $podpath stem.
248$htmlfile = ""; # write to stdout by default
249$podfile = ""; # read from stdin by default
250@podpath = (); # list of directories containing library pods.
251$podroot = "."; # filesystem base directory from which all
252 # relative paths in $podpath stem.
34db337b 253$css = ''; # Cascading style sheet
54310121 254$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 255$quiet = 0; # not quiet by default
54310121 256$verbose = 0; # not verbose by default
257$doindex = 1; # non-zero if we should generate an index
258$listlevel = 0; # current list depth
2a28b791 259@listend = (); # the text to use to end the list.
260$after_lpar = 0; # set to true after a par in an =item
54310121 261$ignore = 1; # whether or not to format text. we don't
262 # format text until we hit our first pod
263 # directive.
264
265@items_seen = ();
266%items_named = ();
267$netscape = 0; # whether or not to use netscape directives.
34db337b 268$header = 0; # produce block header/footer
54310121 269$title = ''; # title to give the pod(s)
270$top = 1; # true if we are at the top of the doc. used
271 # to prevent the first <HR> directive.
272$paragraph = ''; # which paragraph we're processing (used
273 # for error messages)
54310121 274%sections = (); # sections within this page
3e3baf6d 275
276# These are not reinitialised here but are kept as a cache.
277# See get_cache and related cache management code.
278#%pages = (); # associative array used to find the location
279 # of pages referenced by L<> links.
280#%items = (); # associative array used to find the location
54310121 281 # of =item directives referenced by C<> links
2a28b791 282%local_items = ();
39e571d4 283$Is83=$^O eq 'dos';
54310121 284}
285
2a28b791 286#
287# clean_data: global clean-up of pod data
288#
289sub clean_data($){
290 my( $dataref ) = @_;
291 my $i;
292 for( $i = 0; $i <= $#$dataref; $i++ ){
293 ${$dataref}[$i] =~ s/\s+\Z//;
294
295 # have a look for all-space lines
296 if( ${$dataref}[$i] =~ /^\s+$/m ){
297 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
298 splice( @$dataref, $i, 1, @chunks );
299 }
300 }
301}
302
303
54310121 304sub pod2html {
305 local(@ARGV) = @_;
306 local($/);
307 local $_;
308
309 init_globals();
310
39e571d4 311 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
312
54310121 313 # cache of %pages and %items from last time we ran pod2html
54310121 314
315 #undef $opt_help if defined $opt_help;
316
317 # parse the command-line parameters
318 parse_command_line();
319
320 # set some variables to their default values if necessary
321 local *POD;
322 unless (@ARGV && $ARGV[0]) {
323 $podfile = "-" unless $podfile; # stdin
324 open(POD, "<$podfile")
325 || die "$0: cannot open $podfile file for input: $!\n";
326 } else {
327 $podfile = $ARGV[0]; # XXX: might be more filenames
328 *POD = *ARGV;
329 }
330 $htmlfile = "-" unless $htmlfile; # stdout
331 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9 332 $htmldir =~ s#/$## ; # so we don't get a //
333 if ( $htmlroot eq ''
334 && defined( $htmldir )
335 && $htmldir ne ''
336 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
337 )
5a039dd3 338 {
29f227c9 339 # Set the 'base' url for this file, so that we can use it
340 # as the location from which to calculate relative links
341 # to other files. If this is '', then absolute links will
342 # be used throughout.
343 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 344 }
54310121 345
346 # read the pod a paragraph at a time
347 warn "Scanning for sections in input file(s)\n" if $verbose;
348 $/ = "";
349 my @poddata = <POD>;
350 close(POD);
2a28b791 351 clean_data( \@poddata );
54310121 352
353 # scan the pod for =head[1-6] directives and build an index
354 my $index = scan_headings(\%sections, @poddata);
355
3e3baf6d 356 unless($index) {
31e56455 357 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 358 }
359
54310121 360 # open the output file
361 open(HTML, ">$htmlfile")
362 || die "$0: cannot open $htmlfile file for output: $!\n";
363
d011ffae 364 # put a title in the HTML file if one wasn't specified
365 if ($title eq '') {
366 TITLE_SEARCH: {
367 for (my $i = 0; $i < @poddata; $i++) {
368 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
369 for my $para ( @poddata[$i, $i+1] ) {
370 last TITLE_SEARCH
371 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
372 }
373 }
54310121 374
d011ffae 375 }
376 }
377 }
3e3baf6d 378 if (!$title and $podfile =~ /\.pod$/) {
379 # probably a split pod so take first =head[12] as title
380 for (my $i = 0; $i < @poddata; $i++) {
381 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
382 }
383 warn "adopted '$title' as title for $podfile\n"
384 if $verbose and $title;
385 }
7b8d334a 386 if ($title) {
387 $title =~ s/\s*\(.*\)//;
388 } else {
34db337b 389 warn "$0: no title for $podfile" unless $quiet;
54310121 390 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
391 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 392 warn "using $title" if $verbose;
54310121 393 }
34db337b 394 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
395 $csslink =~ s,\\,/,g;
396 $csslink =~ s,(/.):,$1|,;
397
398 my $block = $header ? <<END_OF_BLOCK : '';
399<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
400<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
401<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
402</TD></TR>
403</TABLE>
404END_OF_BLOCK
405
54310121 406 print HTML <<END_OF_HEAD;
7b8d334a 407<HTML>
408<HEAD>
34db337b 409<TITLE>$title</TITLE>$csslink
7b8d334a 410<LINK REV="made" HREF="mailto:$Config{perladmin}">
411</HEAD>
54310121 412
7b8d334a 413<BODY>
34db337b 414$block
54310121 415END_OF_HEAD
416
3e3baf6d 417 # load/reload/validate/cache %pages and %items
418 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 419
420 # scan the pod for =item directives
2a28b791 421 scan_items( \%local_items, "", @poddata);
54310121 422
423 # put an index at the top of the file. note, if $doindex is 0 we
424 # still generate an index, but surround it with an html comment.
425 # that way some other program can extract it if desired.
426 $index =~ s/--+/-/g;
2a28b791 427 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 428 print HTML "<!-- INDEX BEGIN -->\n";
429 print HTML "<!--\n" unless $doindex;
430 print HTML $index;
431 print HTML "-->\n" unless $doindex;
432 print HTML "<!-- INDEX END -->\n\n";
31e56455 433 print HTML "<HR>\n" if $doindex and $index;
54310121 434
435 # now convert this file
2a28b791 436 my $after_item; # set to true after an =item
437 warn "Converting input file $podfile\n" if $verbose;
438 foreach my $i (0..$#poddata){
439 $ptQuote = 0; # status of quote conversion
440
54310121 441 $_ = $poddata[$i];
442 $paragraph = $i+1;
443 if (/^(=.*)/s) { # is it a pod directive?
444 $ignore = 0;
2a28b791 445 $after_item = 0;
54310121 446 $_ = $1;
447 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
448 process_begin($1, $2);
449 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
450 process_end($1, $2);
451 } elsif (/^=cut/) { # =cut
452 process_cut();
453 } elsif (/^=pod/) { # =pod
454 process_pod();
455 } else {
456 next if @begin_stack && $begin_stack[-1] ne 'html';
457
7b8d334a 458 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 459 process_head( $1, $2, $doindex && $index );
460 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
461 warn "$0: $podfile: =item without bullet, number or text"
462 . " in paragraph $paragraph.\n" if $1 eq '';
463 process_item( $1 );
464 $after_item = 1;
54310121 465 } elsif (/^=over\s*(.*)/) { # =over N
466 process_over();
467 } elsif (/^=back/) { # =back
468 process_back();
469 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
470 process_for($1,$2);
471 } else {
472 /^=(\S*)\s*/;
473 warn "$0: $podfile: unknown pod directive '$1' in "
474 . "paragraph $paragraph. ignoring.\n";
475 }
476 }
477 $top = 0;
478 }
479 else {
480 next if $ignore;
481 next if @begin_stack && $begin_stack[-1] ne 'html';
482 my $text = $_;
2a28b791 483 if( $text =~ /\A\s+/ ){
484 process_pre( \$text );
485 print HTML "<PRE>\n$text</PRE>\n";
486
487 } else {
488 process_text( \$text );
489
490 # experimental: check for a paragraph where all lines
491 # have some ...\t...\t...\n pattern
492 if( $text =~ /\t/ ){
493 my @lines = split( "\n", $text );
494 if( @lines > 1 ){
495 my $all = 2;
496 foreach my $line ( @lines ){
497 if( $line =~ /\S/ && $line !~ /\t/ ){
498 $all--;
499 last if $all == 0;
500 }
501 }
502 if( $all > 0 ){
503 $text =~ s/\t+/<TD>/g;
504 $text =~ s/^/<TR><TD>/gm;
505 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
506 $text . '</TABLE>';
507 }
508 }
509 }
510 ## end of experimental
511
512 if( $after_item ){
513 print HTML "$text\n";
514 $after_lpar = 1;
515 } else {
516 print HTML "<P>$text</P>\n";
517 }
518 }
519 $after_item = 0;
54310121 520 }
521 }
522
523 # finish off any pending directives
524 finish_list();
2a28b791 525
526 # link to page index
527 print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
528 if $doindex and $index;
529
54310121 530 print HTML <<END_OF_TAIL;
34db337b 531$block
7b8d334a 532</BODY>
54310121 533
7b8d334a 534</HTML>
54310121 535END_OF_TAIL
536
537 # close the html file
538 close(HTML);
539
540 warn "Finished\n" if $verbose;
541}
542
543##############################################################################
544
545my $usage; # see below
546sub usage {
547 my $podfile = shift;
548 warn "$0: $podfile: @_\n" if @_;
549 die $usage;
550}
551
552$usage =<<END_OF_USAGE;
553Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
554 --podpath=<name>:...:<name> --podroot=<name>
555 --libpods=<name>:...:<name> --recurse --verbose --index
556 --netscape --norecurse --noindex
557
558 --flush - flushes the item and directory caches.
559 --help - prints this message.
560 --htmlroot - http-server base directory from which all relative paths
561 in podpath stem (default is /).
562 --index - generate an index at the top of the resulting html
563 (default).
564 --infile - filename for the pod to convert (input taken from stdin
565 by default).
566 --libpods - colon-separated list of pages to search for =item pod
567 directives in as targets of C<> and implicit links (empty
568 by default). note, these are not filenames, but rather
569 page names like those that appear in L<> links.
570 --netscape - will use netscape html directives when applicable.
571 --nonetscape - will not use netscape directives (default).
572 --outfile - filename for the resulting html file (output sent to
573 stdout by default).
574 --podpath - colon-separated list of directories containing library
575 pods. empty by default.
576 --podroot - filesystem base directory from which all relative paths
577 in podpath stem (default is .).
578 --noindex - don't generate an index at the top of the resulting html.
579 --norecurse - don't recurse on those subdirectories listed in podpath.
580 --recurse - recurse on those subdirectories listed in podpath
581 (default behavior).
582 --title - title that will appear in resulting html file.
34db337b 583 --header - produce block header/footer
584 --css - stylesheet URL
54310121 585 --verbose - self-explanatory
34db337b 586 --quiet - supress some benign warning messages
54310121 587
588END_OF_USAGE
589
590sub parse_command_line {
2a28b791 591 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
34db337b 592 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 593 my $result = GetOptions(
29f227c9 594 'flush' => \$opt_flush,
595 'help' => \$opt_help,
596 'htmldir=s' => \$opt_htmldir,
54310121 597 'htmlroot=s' => \$opt_htmlroot,
29f227c9 598 'index!' => \$opt_index,
54310121 599 'infile=s' => \$opt_infile,
600 'libpods=s' => \$opt_libpods,
601 'netscape!' => \$opt_netscape,
602 'outfile=s' => \$opt_outfile,
603 'podpath=s' => \$opt_podpath,
604 'podroot=s' => \$opt_podroot,
54310121 605 'recurse!' => \$opt_recurse,
606 'title=s' => \$opt_title,
34db337b 607 'header' => \$opt_header,
608 'css=s' => \$opt_css,
54310121 609 'verbose' => \$opt_verbose,
34db337b 610 'quiet' => \$opt_quiet,
54310121 611 );
612 usage("-", "invalid parameters") if not $result;
613
614 usage("-") if defined $opt_help; # see if the user asked for help
615 $opt_help = ""; # just to make -w shut-up.
616
617 $podfile = $opt_infile if defined $opt_infile;
618 $htmlfile = $opt_outfile if defined $opt_outfile;
5a039dd3 619 $htmldir = $opt_htmldir if defined $opt_outfile;
54310121 620
621 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
622 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
623
624 warn "Flushing item and directory caches\n"
625 if $opt_verbose && defined $opt_flush;
626 unlink($dircache, $itemcache) if defined $opt_flush;
627
628 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
629 $podroot = $opt_podroot if defined $opt_podroot;
630
631 $doindex = $opt_index if defined $opt_index;
632 $recurse = $opt_recurse if defined $opt_recurse;
633 $title = $opt_title if defined $opt_title;
34db337b 634 $header = defined $opt_header ? 1 : 0;
635 $css = $opt_css if defined $opt_css;
54310121 636 $verbose = defined $opt_verbose ? 1 : 0;
34db337b 637 $quiet = defined $opt_quiet ? 1 : 0;
54310121 638 $netscape = $opt_netscape if defined $opt_netscape;
639}
640
3e3baf6d 641
642my $saved_cache_key;
643
644sub get_cache {
645 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
646 my @cache_key_args = @_;
647
648 # A first-level cache:
649 # Don't bother reading the cache files if they still apply
650 # and haven't changed since we last read them.
651
652 my $this_cache_key = cache_key(@cache_key_args);
653
654 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
655
656 # load the cache of %pages and %items if possible. $tests will be
657 # non-zero if successful.
658 my $tests = 0;
659 if (-f $dircache && -f $itemcache) {
660 warn "scanning for item cache\n" if $verbose;
661 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
662 }
663
664 # if we didn't succeed in loading the cache then we must (re)build
665 # %pages and %items.
666 if (!$tests) {
667 warn "scanning directories in pod-path\n" if $verbose;
668 scan_podpath($podroot, $recurse, 0);
669 }
670 $saved_cache_key = cache_key(@cache_key_args);
671}
672
673sub cache_key {
674 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
675 return join('!', $dircache, $itemcache, $recurse,
29f227c9 676 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 677}
678
54310121 679#
3e3baf6d 680# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 681# are valid caches of %pages and %items. if they are valid then it loads
682# them and returns a non-zero value.
683#
3e3baf6d 684sub load_cache {
54310121 685 my($dircache, $itemcache, $podpath, $podroot) = @_;
686 my($tests);
687 local $_;
688
689 $tests = 0;
690
691 open(CACHE, "<$itemcache") ||
692 die "$0: error opening $itemcache for reading: $!\n";
693 $/ = "\n";
694
695 # is it the same podpath?
696 $_ = <CACHE>;
697 chomp($_);
3e3baf6d 698 $tests++ if (join(":", @$podpath) eq $_);
54310121 699
700 # is it the same podroot?
701 $_ = <CACHE>;
702 chomp($_);
703 $tests++ if ($podroot eq $_);
704
705 # load the cache if its good
706 if ($tests != 2) {
707 close(CACHE);
54310121 708 return 0;
709 }
710
711 warn "loading item cache\n" if $verbose;
712 while (<CACHE>) {
713 /(.*?) (.*)$/;
714 $items{$1} = $2;
715 }
716 close(CACHE);
717
718 warn "scanning for directory cache\n" if $verbose;
719 open(CACHE, "<$dircache") ||
720 die "$0: error opening $dircache for reading: $!\n";
721 $/ = "\n";
722 $tests = 0;
723
724 # is it the same podpath?
725 $_ = <CACHE>;
726 chomp($_);
3e3baf6d 727 $tests++ if (join(":", @$podpath) eq $_);
54310121 728
729 # is it the same podroot?
730 $_ = <CACHE>;
731 chomp($_);
732 $tests++ if ($podroot eq $_);
733
734 # load the cache if its good
735 if ($tests != 2) {
736 close(CACHE);
54310121 737 return 0;
738 }
739
740 warn "loading directory cache\n" if $verbose;
741 while (<CACHE>) {
742 /(.*?) (.*)$/;
743 $pages{$1} = $2;
744 }
745
746 close(CACHE);
747
748 return 1;
749}
750
751#
752# scan_podpath - scans the directories specified in @podpath for directories,
753# .pod files, and .pm files. it also scans the pod files specified in
754# @libpods for =item directives.
755#
756sub scan_podpath {
3e3baf6d 757 my($podroot, $recurse, $append) = @_;
54310121 758 my($pwd, $dir);
759 my($libpod, $dirname, $pod, @files, @poddata);
760
3e3baf6d 761 unless($append) {
762 %items = ();
763 %pages = ();
764 }
765
54310121 766 # scan each directory listed in @podpath
767 $pwd = getcwd();
768 chdir($podroot)
769 || die "$0: error changing to directory $podroot: $!\n";
770 foreach $dir (@podpath) {
771 scan_dir($dir, $recurse);
772 }
773
774 # scan the pods listed in @libpods for =item directives
775 foreach $libpod (@libpods) {
776 # if the page isn't defined then we won't know where to find it
777 # on the system.
778 next unless defined $pages{$libpod} && $pages{$libpod};
779
780 # if there is a directory then use the .pod and .pm files within it.
29f227c9 781 # NOTE: Only finds the first so-named directory in the tree.
782# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
783 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 784 # find all the .pod and .pm files within the directory
785 $dirname = $1;
786 opendir(DIR, $dirname) ||
787 die "$0: error opening directory $dirname: $!\n";
788 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
789 closedir(DIR);
790
791 # scan each .pod and .pm file for =item directives
792 foreach $pod (@files) {
793 open(POD, "<$dirname/$pod") ||
794 die "$0: error opening $dirname/$pod for input: $!\n";
795 @poddata = <POD>;
796 close(POD);
2a28b791 797 clean_data( \@poddata );
54310121 798
2a28b791 799 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 800 }
801
802 # use the names of files as =item directives too.
2a28b791 803### Don't think this should be done this way - confuses issues.(WL)
804### foreach $pod (@files) {
805### $pod =~ /^(.*)(\.pod|\.pm)$/;
806### $items{$1} = "$dirname/$1.html" if $1;
807### }
54310121 808 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
809 $pages{$libpod} =~ /([^:]*\.pm):/) {
810 # scan the .pod or .pm file for =item directives
811 $pod = $1;
812 open(POD, "<$pod") ||
813 die "$0: error opening $pod for input: $!\n";
814 @poddata = <POD>;
815 close(POD);
2a28b791 816 clean_data( \@poddata );
54310121 817
2a28b791 818 scan_items( \%items, "$pod", @poddata);
54310121 819 } else {
820 warn "$0: shouldn't be here (line ".__LINE__."\n";
821 }
822 }
823 @poddata = (); # clean-up a bit
824
825 chdir($pwd)
826 || die "$0: error changing to directory $pwd: $!\n";
827
828 # cache the item list for later use
829 warn "caching items for later use\n" if $verbose;
830 open(CACHE, ">$itemcache") ||
831 die "$0: error open $itemcache for writing: $!\n";
832
833 print CACHE join(":", @podpath) . "\n$podroot\n";
834 foreach my $key (keys %items) {
835 print CACHE "$key $items{$key}\n";
836 }
837
838 close(CACHE);
839
840 # cache the directory list for later use
841 warn "caching directories for later use\n" if $verbose;
842 open(CACHE, ">$dircache") ||
843 die "$0: error open $dircache for writing: $!\n";
844
845 print CACHE join(":", @podpath) . "\n$podroot\n";
846 foreach my $key (keys %pages) {
847 print CACHE "$key $pages{$key}\n";
848 }
849
850 close(CACHE);
851}
852
853#
854# scan_dir - scans the directory specified in $dir for subdirectories, .pod
855# files, and .pm files. notes those that it finds. this information will
856# be used later in order to figure out where the pages specified in L<>
857# links are on the filesystem.
858#
859sub scan_dir {
860 my($dir, $recurse) = @_;
861 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
862 local $_;
863
864 @subdirs = ();
865 @pods = ();
866
867 opendir(DIR, $dir) ||
868 die "$0: error opening directory $dir: $!\n";
869 while (defined($_ = readdir(DIR))) {
870 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
871 $pages{$_} = "" unless defined $pages{$_};
872 $pages{$_} .= "$dir/$_:";
873 push(@subdirs, $_);
874 } elsif (/\.pod$/) { # .pod
875 s/\.pod$//;
876 $pages{$_} = "" unless defined $pages{$_};
877 $pages{$_} .= "$dir/$_.pod:";
878 push(@pods, "$dir/$_.pod");
879 } elsif (/\.pm$/) { # .pm
880 s/\.pm$//;
881 $pages{$_} = "" unless defined $pages{$_};
882 $pages{$_} .= "$dir/$_.pm:";
883 push(@pods, "$dir/$_.pm");
884 }
885 }
886 closedir(DIR);
887
888 # recurse on the subdirectories if necessary
889 if ($recurse) {
890 foreach my $subdir (@subdirs) {
891 scan_dir("$dir/$subdir", $recurse);
892 }
893 }
894}
895
896#
897# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
898# build an index.
899#
900sub scan_headings {
901 my($sections, @data) = @_;
2a28b791 902 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 903
be173d55 904 # here we need local $ignore = 0;
905 # unfortunately, we can't have it, because $ignore is lexical
906 $ignore = 0;
907
54310121 908 $listdepth = 0;
909 $index = "";
910
911 # scan for =head directives, note their name, and build an index
912 # pointing to each of them.
913 foreach my $line (@data) {
bb9460ed 914 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 915 ($tag, $which_head, $otitle) = ($1,$2,$3);
916
917 my $title = depod( $otitle );
918 my $name = htmlify( $title );
919 $$sections{$name} = 1;
920 $title = process_text( \$otitle );
54310121 921
102c538a 922 while ($which_head != $listdepth) {
923 if ($which_head > $listdepth) {
924 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
925 $listdepth++;
926 } elsif ($which_head < $listdepth) {
927 $listdepth--;
928 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
929 }
54310121 930 }
54310121 931
932 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 933 "<A HREF=\"#" . $name . "\">" .
934 $title . "</A></LI>";
54310121 935 }
936 }
937
938 # finish off the lists
939 while ($listdepth--) {
940 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
941 }
942
943 # get rid of bogus lists
944 $index =~ s,\t*<UL>\s*</UL>\n,,g;
945
bb9460ed 946 $ignore = 1; # restore old value;
be173d55 947
54310121 948 return $index;
949}
950
951#
952# scan_items - scans the pod specified by $pod for =item directives. we
953# will use this information later on in resolving C<> links.
954#
955sub scan_items {
2a28b791 956 my( $itemref, $pod, @poddata ) = @_;
54310121 957 my($i, $item);
958 local $_;
959
960 $pod =~ s/\.pod$//;
961 $pod .= ".html" if $pod;
962
963 foreach $i (0..$#poddata) {
2a28b791 964 my $txt = depod( $poddata[$i] );
965
966 # figure out what kind of item it is.
967 # Build string for referencing this item.
968 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
969 next unless $1;
970 $item = $1;
971 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
972 $item = $1;
973 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
974 $item = $1;
975 } else {
976 next;
54310121 977 }
2a28b791 978 my $fid = fragment_id( $item );
979 $$itemref{$fid} = "$pod" if $fid;
54310121 980 }
981}
982
983#
984# process_head - convert a pod head[1-6] tag and convert it to HTML format.
985#
986sub process_head {
2a28b791 987 my($tag, $heading, $hasindex) = @_;
54310121 988
989 # figure out the level of the =head
990 $tag =~ /head([1-6])/;
991 my $level = $1;
992
2a28b791 993 if( $listlevel ){
994 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
995 while( $listlevel ){
996 process_back();
997 }
998 }
999
1000 print HTML "<P>\n";
1001 if( $level == 1 && ! $top ){
1002 print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1003 if $hasindex;
1004 print HTML "<HR>\n"
1005 }
1006
1007 my $name = htmlify( depod( $heading ) );
1008 my $convert = process_text( \$heading );
1009 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1010}
1011
2a28b791 1012
54310121 1013#
2a28b791 1014# emit_item_tag - print an =item's text
1015# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1016#
2a28b791 1017my $EmittedItem;
1018
1019sub emit_item_tag($$$){
1020 my( $otext, $text, $compact ) = @_;
1021 my $item = fragment_id( $text );
54310121 1022
2a28b791 1023 $EmittedItem = $item;
1024 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1025
2a28b791 1026 print HTML '<STRONG>';
1027 if ($items_named{$item}++) {
1028 print HTML process_text( \$otext );
1029 } else {
1030 my $name = 'item_' . $item;
1031 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1032 }
1033 print HTML "</STRONG><BR>\n";
1034 undef( $EmittedItem );
1035}
1036
1037sub emit_li {
1038 my( $tag ) = @_;
1039 if( $items_seen[$listlevel]++ == 0 ){
1040 push( @listend, "</$tag>" );
1041 print HTML "<$tag>\n";
1042 }
1043 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1044}
1045
1046#
1047# process_item - convert a pod item tag and convert it to HTML format.
1048#
1049sub process_item {
1050 my( $otext ) = @_;
54310121 1051
1052 # lots of documents start a list without doing an =over. this is
1053 # bad! but, the proper thing to do seems to be to just assume
1054 # they did do an =over. so warn them once and then continue.
2a28b791 1055 if( $listlevel == 0 ){
1056 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1057 process_over();
1058 }
54310121 1059
2a28b791 1060 # formatting: insert a paragraph if preceding item has >1 paragraph
1061 if( $after_lpar ){
1062 print HTML "<P></P>\n";
1063 $after_lpar = 0;
1064 }
54310121 1065
1066 # remove formatting instructions from the text
2a28b791 1067 my $text = depod( $otext );
1068
1069 # all the list variants:
1070 if( $text =~ /\A\*/ ){ # bullet
1071 emit_li( 'UL' );
1072 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1073 my $tag = $1;
1074 $otext =~ s/\A\*\s+//;
1075 emit_item_tag( $otext, $tag, 1 );
54310121 1076 }
1077
2a28b791 1078 } elsif( $text =~ /\A\d+/ ){ # numbered list
1079 emit_li( 'OL' );
1080 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1081 my $tag = $1;
1082 $otext =~ s/\A\d+\.?\s*//;
1083 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1084 }
54310121 1085
2a28b791 1086 } else { # definition list
1087 emit_li( 'DL' );
1088 if ($text =~ /\A(.+)\Z/s ){ # should have text
1089 emit_item_tag( $otext, $text, 1 );
7b8d334a 1090 }
54310121 1091 print HTML '<DD>';
1092 }
54310121 1093 print HTML "\n";
1094}
1095
1096#
2a28b791 1097# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1098#
1099sub process_over {
1100 # start a new list
1101 $listlevel++;
2a28b791 1102 push( @items_seen, 0 );
1103 $after_lpar = 0;
54310121 1104}
1105
1106#
1107# process_back - process a pod back tag and convert it to HTML format.
1108#
1109sub process_back {
2a28b791 1110 if( $listlevel == 0 ){
1111 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1112 return;
1113 }
54310121 1114
1115 # close off the list. note, I check to see if $listend[$listlevel] is
1116 # defined because an =item directive may have never appeared and thus
1117 # $listend[$listlevel] may have never been initialized.
1118 $listlevel--;
2a28b791 1119 if( defined $listend[$listlevel] ){
1120 print HTML '<P></P>' if $after_lpar;
1121 print HTML $listend[$listlevel];
1122 print HTML "\n";
1123 pop( @listend );
1124 }
1125 $after_lpar = 0;
54310121 1126
2a28b791 1127 # clean up item count
1128 pop( @items_seen );
54310121 1129}
1130
1131#
2a28b791 1132# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1133#
1134sub process_cut {
1135 $ignore = 1;
1136}
1137
1138#
2a28b791 1139# process_pod - process a pod pod tag, thus stop ignoring pod directives
1140# until we see a corresponding cut.
54310121 1141#
1142sub process_pod {
1143 # no need to set $ignore to 0 cause the main loop did it
1144}
1145
1146#
2a28b791 1147# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1148# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1149#
1150sub process_for {
1151 my($whom, $text) = @_;
1152 if ( $whom =~ /^(pod2)?html$/i) {
1153 print HTML $text;
c4d9b39d 1154 } elsif ($whom =~ /^illustration$/i) {
1155 1 while chomp $text;
1156 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1157 $text .= $ext, last if -r "$text$ext";
1158 }
1159 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1160 }
54310121 1161}
1162
1163#
1164# process_begin - process a =begin pod tag. this pushes
1165# whom we're beginning on the begin stack. if there's a
1166# begin stack, we only print if it us.
1167#
1168sub process_begin {
1169 my($whom, $text) = @_;
1170 $whom = lc($whom);
1171 push (@begin_stack, $whom);
1172 if ( $whom =~ /^(pod2)?html$/) {
1173 print HTML $text if $text;
1174 }
1175}
1176
1177#
1178# process_end - process a =end pod tag. pop the
1179# begin stack. die if we're mismatched.
1180#
1181sub process_end {
1182 my($whom, $text) = @_;
1183 $whom = lc($whom);
1184 if ($begin_stack[-1] ne $whom ) {
1185 die "Unmatched begin/end at chunk $paragraph\n"
1186 }
2a28b791 1187 pop( @begin_stack );
54310121 1188}
1189
1190#
2a28b791 1191# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1192#
2a28b791 1193sub process_pre {
1194 my( $text ) = @_;
1195 my( $rest );
54310121 1196 return if $ignore;
1197
54310121 1198 $rest = $$text;
1199
2a28b791 1200 # insert spaces in place of tabs
1201 $rest =~ s#.*#
be173d55 1202 my $line = $&;
1203 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1204 $line;
1205 #eg;
54310121 1206
2a28b791 1207 # convert some special chars to HTML escapes
1208 $rest =~ s/&/&amp;/g;
1209 $rest =~ s/</&lt;/g;
1210 $rest =~ s/>/&gt;/g;
1211 $rest =~ s/"/&quot;/g;
1212
1213 # try and create links for all occurrences of perl.* within
1214 # the preformatted text.
1215 $rest =~ s{
1216 (\s*)(perl\w+)
1217 }{
1218 if ( defined $pages{$2} ){ # is a link
1219 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1220 } elsif (defined $pages{dosify($2)}) { # is a link
1221 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1222 } else {
1223 "$1$2";
1224 }
1225 }xeg;
1226 $rest =~ s{
1227 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1228 }{
1229 my $url ;
1230 if ( $htmlfileurl ne '' ){
1231 # Here, we take advantage of the knowledge
1232 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1233 # Since $htmlroot eq '', we need to prepend $htmldir
1234 # on the fron of the link to get the absolute path
1235 # of the link's target. We check for a leading '/'
1236 # to avoid corrupting links that are #, file:, etc.
1237 my $old_url = $3 ;
1238 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1239 $url = relativize_url( "$old_url.html", $htmlfileurl );
1240 } else {
1241 $url = "$3.html" ;
1242 }
1243 "$1$url" ;
1244 }xeg;
1245
1246 # Look for embedded URLs and make them into links. We don't
1247 # relativize them since they are best left as the author intended.
1248
1249 my $urls = '(' . join ('|', qw{
54310121 1250 http
1251 telnet
1252 mailto
1253 news
1254 gopher
1255 file
1256 wais
1257 ftp
1258 } )
1259 . ')';
1260
2a28b791 1261 my $ltrs = '\w';
1262 my $gunk = '/#~:.?+=&%@!\-';
1263 my $punc = '.:?\-';
1264 my $any = "${ltrs}${gunk}${punc}";
54310121 1265
2a28b791 1266 $rest =~ s{
54310121 1267 \b # start at word boundary
1268 ( # begin $1 {
29f227c9 1269 $urls : # need resource and a colon
1270 (?!:) # Ignore File::, among others.
54310121 1271 [$any] +? # followed by on or more
1272 # of any valid character, but
1273 # be conservative and take only
1274 # what you need to....
1275 ) # end $1 }
1276 (?= # look-ahead non-consumptive assertion
1277 [$punc]* # either 0 or more puntuation
1278 [^$any] # followed by a non-url char
1279 | # or else
1280 $ # then end of the string
1281 )
1282 }{<A HREF="$1">$1</A>}igox;
1283
2a28b791 1284 # text should be as it is (verbatim)
1285 $$text = $rest;
1286}
54310121 1287
54310121 1288
2a28b791 1289#
1290# pure text processing
1291#
1292# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1293# we don't want this to happen within IS
1294#
1295sub pure_text($){
1296 my $text = shift();
1297 process_puretext( $text, \$ptQuote, 1 );
54310121 1298}
1299
2a28b791 1300sub inIS_text($){
1301 my $text = shift();
1302 process_puretext( $text, \$ptQuote, 0 );
1303}
54310121 1304
1305#
1306# process_puretext - process pure text (without pod-escapes) converting
1307# double-quotes and handling implicit C<> links.
1308#
1309sub process_puretext {
2a28b791 1310 my($text, $quote, $notinIS) = @_;
54310121 1311
2a28b791 1312 ## Guessing at func() or [$@%&]*var references in plain text is destined
1313 ## to produce some strange looking ref's. uncomment to disable:
1314 ## $notinIS = 0;
1315
1316 my(@words, $lead, $trail);
54310121 1317
2a28b791 1318 # convert double-quotes to single-quotes
1319 if( $$quote && $text =~ s/"/''/s ){
1320 $$quote = 0;
1321 }
1322 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1323 $$quote = 1 if $text =~ s/"/``/s;
54310121 1324
1325 # keep track of leading and trailing white-space
2a28b791 1326 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1327 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1328
2a28b791 1329 # split at space/non-space boundaries
1330 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1331
1332 # process each word individually
1333 foreach my $word (@words) {
2a28b791 1334 # skip space runs
1335 next if $word =~ /^\s*$/;
54310121 1336 # see if we can infer a link
2a28b791 1337 if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
54310121 1338 # has parenthesis so should have been a C<> ref
2a28b791 1339 ## try for a pagename (perlXXX(1))?
1340 if( $2 =~ /^\d+$/ ){
1341 my $url = page_sect( $word, '' );
1342 if( defined $url ){
1343 $word = "<A HREF=\"$url\">the $word manpage</A>";
1344 next;
1345 }
1346 }
1347 $word = emit_C( $word );
1348
1349#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1350## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1351## # perl variables, should be a C<> ref
1352## $word = emit_C( $word );
1353
54310121 1354 } elsif ($word =~ m,^\w+://\w,) {
1355 # looks like a URL
5a039dd3 1356 # Don't relativize it: leave it as the author intended
54310121 1357 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1358 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1359 # looks like an e-mail address
7b8d334a 1360 my ($w1, $w2, $w3) = ("", $word, "");
1361 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1362 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1363 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1364 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1365 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1366 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1367 } else {
7b8d334a 1368 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1369 }
1370 }
1371
2a28b791 1372 # put everything back together
1373 return $lead . join( '', @words ) . $trail;
1374}
1375
54310121 1376
2a28b791 1377#
1378# process_text - handles plaintext that appears in the input pod file.
1379# there may be pod commands embedded within the text so those must be
1380# converted to html commands.
1381#
1382sub process_text {
1383 return if $ignore;
1384 my( $tref ) = @_;
1385 my $res = process_text1( 0, $tref );
1386 $$tref = $res;
1387}
1388
1389sub process_text1($$;$){
1390 my( $lev, $rstr, $func ) = @_;
1391 $lev++ unless defined $func;
1392 my $res = '';
1393
1394 if( $func eq 'B' ){
1395 # B<text> - boldface
1396 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1397
1398 } elsif( $func eq 'C' ){
1399 # C<code> - can be a ref or <CODE></CODE>
1400 # need to extract text
1401 my $par = go_ahead( $rstr, 'C' );
1402
1403 ## clean-up of the link target
1404 my $text = depod( $par );
1405
1406 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1407 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1408
1409 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1410
1411 } elsif( $func eq 'E' ){
1412 # E<x> - convert to character
1413 $$rstr =~ s/^(\w+)>//;
1414 $res = "&$1;";
1415
1416 } elsif( $func eq 'F' ){
1417 # F<filename> - italizice
1418 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1419
1420 } elsif( $func eq 'I' ){
1421 # I<text> - italizice
1422 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1423
1424 } elsif( $func eq 'L' ){
1425 # L<link> - link
1426 ## L<text|cross-ref> => produce text, use cross-ref for linking
1427 ## L<cross-ref> => make text from cross-ref
1428 ## need to extract text
1429 my $par = go_ahead( $rstr, 'L' );
1430
1431 # some L<>'s that shouldn't be:
1432 # a) full-blown URL's are emitted as-is
1433 if( $par =~ m{^\w+://}s ){
1434 return make_URL_href( $par );
1435 }
1436 # b) C<...> is stripped and treated as C<>
1437 if( $par =~ /^C<(.*)>$/ ){
1438 my $text = depod( $1 );
1439 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1440 }
1441
1442 # analyze the contents
1443 $par =~ s/\n/ /g; # undo word-wrapped tags
1444 my $opar = $par;
1445 my $linktext;
1446 if( $par =~ s{^([^|]+)\|}{} ){
1447 $linktext = $1;
1448 }
1449
1450 # make sure sections start with a /
1451 $par =~ s{^"}{/"};
1452
1453 my( $page, $section, $ident );
1454
1455 # check for link patterns
1456 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1457 # we've got a name/ident (no quotes)
1458 ( $page, $ident ) = ( $1, $2 );
1459 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1460
1461 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1462 # even though this should be a "section", we go for ident first
1463 ( $page, $ident ) = ( $1, $2 );
1464 ### print STDERR "--> L<$par> to page $page, section $section\n";
1465
1466 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1467 ( $page, $section ) = ( '', $par );
1468 ### print STDERR "--> L<$par> to void page, section $section\n";
1469
1470 } else {
1471 ( $page, $section ) = ( $par, '' );
1472 ### print STDERR "--> L<$par> to page $par, void section\n";
1473 }
1474
1475 # now, either $section or $ident is defined. the convoluted logic
1476 # below tries to resolve L<> according to what the user specified.
1477 # failing this, we try to find the next best thing...
1478 my( $url, $ltext, $fid );
1479
1480 RESOLVE: {
1481 if( defined $ident ){
1482 ## try to resolve $ident as an item
1483 ( $url, $fid ) = coderef( $page, $ident );
1484 if( $url ){
1485 if( ! defined( $linktext ) ){
1486 $linktext = $ident;
1487 $linktext .= " in " if $ident && $page;
1488 $linktext .= "the $page manpage" if $page;
1489 }
1490 ### print STDERR "got coderef url=$url\n";
1491 last RESOLVE;
1492 }
1493 ## no luck: go for a section (auto-quoting!)
1494 $section = $ident;
1495 }
1496 ## now go for a section
1497 my $htmlsection = htmlify( $section );
1498 $url = page_sect( $page, $htmlsection );
1499 if( $url ){
1500 if( ! defined( $linktext ) ){
1501 $linktext = $section;
1502 $linktext .= " in " if $section && $page;
1503 $linktext .= "the $page manpage" if $page;
1504 }
1505 ### print STDERR "got page/section url=$url\n";
1506 last RESOLVE;
1507 }
1508 ## no luck: go for an ident
1509 if( $section ){
1510 $ident = $section;
1511 } else {
1512 $ident = $page;
1513 $page = undef();
1514 }
1515 ( $url, $fid ) = coderef( $page, $ident );
1516 if( $url ){
1517 if( ! defined( $linktext ) ){
1518 $linktext = $ident;
1519 $linktext .= " in " if $ident && $page;
1520 $linktext .= "the $page manpage" if $page;
1521 }
1522 ### print STDERR "got section=>coderef url=$url\n";
1523 last RESOLVE;
1524 }
1525
1526 # warning; show some text.
1527 $linktext = $opar unless defined $linktext;
1528 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1529 }
1530
1531 # now we have an URL or just plain code
1532 $$rstr = $linktext . '>' . $$rstr;
1533 if( defined( $url ) ){
1534 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1535 } else {
1536 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1537 }
1538
1539 } elsif( $func eq 'S' ){
1540 # S<text> - non-breaking spaces
1541 $res = process_text1( $lev, $rstr );
1542 $res =~ s/ /&nbsp;/g;
1543
1544 } elsif( $func eq 'X' ){
1545 # X<> - ignore
1546 $$rstr =~ s/^[^>]*>//;
1547
1548 } elsif( $func eq 'Z' ){
1549 # Z<> - empty
1550 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1551 unless $$rstr =~ s/^>//;
1552
1553 } else {
1554 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1555 # all others: either recurse into new function or
1556 # terminate at closing angle bracket
1557 my $pt = $1;
1558 $pt .= '>' if $2 eq '>' && $lev == 1;
1559 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1560 return $res if $2 eq '>' && $lev > 1;
1561 if( $2 ne '>' ){
1562 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1563 }
1564
1565 }
1566 if( $lev == 1 ){
1567 $res .= pure_text( $$rstr );
54310121 1568 } else {
2a28b791 1569 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1570 }
1571 }
2a28b791 1572 return $res;
1573}
54310121 1574
2a28b791 1575#
1576# go_ahead: extract text of an IS (can be nested)
1577#
1578sub go_ahead($$){
1579 my( $rstr, $func ) = @_;
1580 my $res = '';
1581 my $level = 1;
1582 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1583 $res .= $1;
1584 if( $2 eq '>' ){
1585 return $res if --$level == 0;
1586 } else {
1587 ++$level;
1588 }
1589 $res .= $2;
1590 }
1591 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1592 return $res;
54310121 1593}
1594
1595#
2a28b791 1596# emit_C - output result of C<text>
1597# $text is the depod-ed text
54310121 1598#
2a28b791 1599sub emit_C($;$){
1600 my( $text, $nocode ) = @_;
1601 my $res;
1602 my( $url, $fid ) = coderef( undef(), $text );
1603
1604 # need HTML-safe text
1605 my $linktext = html_escape( $text );
1606
1607 if( defined( $url ) &&
1608 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1609 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1610 } elsif( 0 && $nocode ){
1611 $res = $linktext;
1612 } else {
1613 $res = "<CODE>$linktext</CODE>";
1614 }
1615 return $res;
54310121 1616}
1617
1618#
2a28b791 1619# html_escape: make text safe for HTML
1620#
1621sub html_escape {
1622 my $rest = $_[0];
1623 $rest =~ s/&/&amp;/g;
1624 $rest =~ s/</&lt;/g;
1625 $rest =~ s/>/&gt;/g;
1626 $rest =~ s/"/&quot;/g;
1627 return $rest;
1628}
1629
1630
1631#
39e571d4 1632# dosify - convert filenames to 8.3
1633#
1634sub dosify {
1635 my($str) = @_;
fe4c6be1 1636 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1637 if ($Is83) {
1638 $str = lc $str;
1639 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1640 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1641 }
1642 return $str;
1643}
1644
1645#
2a28b791 1646# page_sect - make an URL from the text of a L<>
54310121 1647#
2a28b791 1648sub page_sect($$) {
1649 my( $page, $section ) = @_;
1650 my( $linktext, $page83, $link); # work strings
1651
1652 # check if we know that this is a section in this page
1653 if (!defined $pages{$page} && defined $sections{$page}) {
1654 $section = $page;
1655 $page = "";
1656 ### print STDERR "reset page='', section=$section\n";
54310121 1657 }
1658
39e571d4 1659 $page83=dosify($page);
1660 $page=$page83 if (defined $pages{$page83});
54310121 1661 if ($page eq "") {
2a28b791 1662 $link = "#" . htmlify( $section );
350ccacd 1663 } elsif ( $page =~ /::/ ) {
350ccacd 1664 $page =~ s,::,/,g;
29f227c9 1665 # Search page cache for an entry keyed under the html page name,
1666 # then look to see what directory that page might be in. NOTE:
1667 # this will only find one page. A better solution might be to produce
1668 # an intermediate page that is an index to all such pages.
1669 my $page_name = $page ;
1670 $page_name =~ s,^.*/,, ;
1671 if ( defined( $pages{ $page_name } ) &&
1672 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1673 ) {
1674 $page = $1 ;
1675 }
1676 else {
1677 # NOTE: This branch assumes that all A::B pages are located in
1678 # $htmlroot/A/B.html . This is often incorrect, since they are
1679 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1680 # analyze the contents of %pages and figure out where any
1681 # cousins of A::B are, then assume that. So, if A::B isn't found,
1682 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1683 # lib/A/B.pm. This is also limited, but it's an improvement.
1684 # Maybe a hints file so that the links point to the correct places
2a28b791 1685 # nonetheless?
1686
29f227c9 1687 }
350ccacd 1688 $link = "$htmlroot/$page.html";
2a28b791 1689 $link .= "#" . htmlify( $section ) if ($section);
54310121 1690 } elsif (!defined $pages{$page}) {
54310121 1691 $link = "";
54310121 1692 } else {
2a28b791 1693 $section = htmlify( $section ) if $section ne "";
1694 ### print STDERR "...section=$section\n";
54310121 1695
1696 # if there is a directory by the name of the page, then assume that an
1697 # appropriate section will exist in the subdirectory
29f227c9 1698# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1699 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1700 $link = "$htmlroot/$1/$section.html";
2a28b791 1701 ### print STDERR "...link=$link\n";
54310121 1702
1703 # since there is no directory by the name of the page, the section will
1704 # have to exist within a .html of the same name. thus, make sure there
1705 # is a .pod or .pm that might become that .html
1706 } else {
2a28b791 1707 $section = "#$section" if $section;
1708 ### print STDERR "...section=$section\n";
1709
54310121 1710 # check if there is a .pod with the page name
1711 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1712 $link = "$htmlroot/$1.html$section";
1713 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1714 $link = "$htmlroot/$1.html$section";
1715 } else {
54310121 1716 $link = "";
54310121 1717 }
1718 }
1719 }
1720
54310121 1721 if ($link) {
29f227c9 1722 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1723 # implies $htmlroot eq ''. This means that the link in question
1724 # needs a prefix of $htmldir if it begins with '/'. The test for
1725 # the initial '/' is done to avoid '#'-only links, and to allow
1726 # for other kinds of links, like file:, ftp:, etc.
1727 my $url ;
1728 if ( $htmlfileurl ne '' ) {
2a28b791 1729 $link = "$htmldir$link" if $link =~ m{^/};
1730 $url = relativize_url( $link, $htmlfileurl );
1731# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1732 }
1733 else {
1734 $url = $link ;
1735 }
2a28b791 1736 return $url;
29f227c9 1737
54310121 1738 } else {
2a28b791 1739 return undef();
54310121 1740 }
54310121 1741}
1742
1743#
29f227c9 1744# relativize_url - convert an absolute URL to one relative to a base URL.
1745# Assumes both end in a filename.
1746#
1747sub relativize_url {
1748 my ($dest,$source) = @_ ;
1749
1750 my ($dest_volume,$dest_directory,$dest_file) =
1751 File::Spec::Unix->splitpath( $dest ) ;
1752 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1753
1754 my ($source_volume,$source_directory,$source_file) =
1755 File::Spec::Unix->splitpath( $source ) ;
1756 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1757
1758 my $rel_path = '' ;
1759 if ( $dest ne '' ) {
1760 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1761 }
1762
1763 if ( $rel_path ne '' &&
1764 substr( $rel_path, -1 ) ne '/' &&
1765 substr( $dest_file, 0, 1 ) ne '#'
1766 ) {
1767 $rel_path .= "/$dest_file" ;
1768 }
1769 else {
1770 $rel_path .= "$dest_file" ;
1771 }
1772
1773 return $rel_path ;
1774}
1775
54310121 1776
1777#
2a28b791 1778# coderef - make URL from the text of a C<>
54310121 1779#
2a28b791 1780sub coderef($$){
1781 my( $page, $item ) = @_;
1782 my( $url );
1783
1784 my $fid = fragment_id( $item );
1785
1786 if( defined( $page ) ){
1787 # we have been given a $page...
1788 $page =~ s{::}{/}g;
1789
1790 # Do we take it? Item could be a section!
1791 my $base = $items{$fid};
1792 $base =~ s{[^/]*/}{};
1793 if( $base ne "$page.html" ){
1794 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1795 $page = undef();
1796 }
54310121 1797
2a28b791 1798 } else {
1799 # no page - local items precede cached items
1800 if( exists $local_items{$fid} ){
1801 $page = $local_items{$fid};
1802 } else {
1803 $page = $items{$fid};
1804 }
1805 }
54310121 1806
1807 # if there was a pod file that we found earlier with an appropriate
1808 # =item directive, then create a link to that page.
2a28b791 1809 if( defined $page ){
1810 if( $page ){
1811 if( $pages{$page} =~ /([^:.]*)\.[^:]*:/){
1812 $page = $1 . '.html';
29f227c9 1813 }
2a28b791 1814 my $link = "$htmlroot/$page#item_$fid";
54310121 1815
2a28b791 1816 # Here, we take advantage of the knowledge that $htmlfileurl
1817 # ne '' implies $htmlroot eq ''.
1818 if ( $htmlfileurl ne '' ) {
1819 $link = "$htmldir$link" ;
1820 $url = relativize_url( $link, $htmlfileurl ) ;
1821 } else {
1822 $url = $link ;
1823 }
1824 } else {
1825 $url = "#item_" . $fid;
1826 }
54310121 1827
2a28b791 1828 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1829 }
1830 return( $url, $fid );
54310121 1831}
1832
54310121 1833
1834
1835#
29f227c9 1836# Adapted from Nick Ing-Simmons' PodToHtml package.
1837sub relative_url {
1838 my $source_file = shift ;
1839 my $destination_file = shift;
1840
1841 my $source = URI::file->new_abs($source_file);
1842 my $uo = URI::file->new($destination_file,$source)->abs;
1843 return $uo->rel->as_string;
1844}
1845
1846
1847#
54310121 1848# finish_list - finish off any pending HTML lists. this should be called
1849# after the entire pod file has been read and converted.
1850#
1851sub finish_list {
7b8d334a 1852 while ($listlevel > 0) {
54310121 1853 print HTML "</DL>\n";
1854 $listlevel--;
1855 }
1856}
1857
1858#
1859# htmlify - converts a pod section specification to a suitable section
2a28b791 1860# specification for HTML. Note that we keep spaces and special characters
1861# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1862#
1863sub htmlify {
2a28b791 1864 my( $heading) = @_;
1865 $heading =~ s/(\s+)/ /g;
1866 $heading =~ s/\s+\Z//;
1867 $heading =~ s/\A\s+//;
1868 # The hyphen is a disgrace to the English language.
1869 $heading =~ s/[-"?]//g;
1870 $heading = lc( $heading );
1871 return $heading;
1872}
54310121 1873
2a28b791 1874#
1875# depod - convert text by eliminating all interior sequences
1876# Note: can be called with copy or modify semantics
1877#
1878my %E2c;
1879$E2c{lt} = '<';
1880$E2c{gt} = '>';
1881$E2c{sol} = '/';
1882$E2c{verbar} = '|';
1883
1884sub depod($){
1885 my $string;
1886 if( ref( $_[0] ) ){
1887 $string = ${$_[0]};
1888 ${$_[0]} = depod1( \$string );
1889 } else {
1890 $string = $_[0];
1891 depod1( \$string );
1892 }
1893}
54310121 1894
2a28b791 1895sub depod1($;$){
1896 my( $rstr, $func ) = @_;
1897 my $res = '';
1898 if( ! defined( $func ) ){
1899 # skip to next begin of an interior sequence
1900 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1901 # recurse into its text
1902 $res .= $1 . depod1( $rstr, $2 );
1903 }
1904 $res .= $$rstr;
1905 } elsif( $func eq 'E' ){
1906 # E<x> - convert to character
1907 $$rstr =~ s/^(\w+)>//;
1908 $res .= $E2c{$1};
1909 } elsif( $func eq 'X' ){
1910 # X<> - ignore
1911 $$rstr =~ s/^[^>]*>//;
1912 } elsif( $func eq 'Z' ){
1913 # Z<> - empty
1914 $$rstr =~ s/^>//;
1915 } else {
1916 # all others: either recurse into new function or
1917 # terminate at closing angle bracket
1918 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1919 $res .= $1;
1920 last if $2 eq '>';
1921 $res .= depod1( $rstr, substr($2,0,1) );
1922 }
1923 ## If we're here and $2 ne '>': undelimited interior sequence.
1924 ## Ignored, as this is called without proper indication of where we are.
1925 ## Rely on process_text to produce diagnostics.
1926 }
1927 return $res;
1928}
54310121 1929
2a28b791 1930#
1931# fragment_id - construct a fragment identifier from:
1932# a) =item text
1933# b) contents of C<...>
1934#
1935my @hc;
1936sub fragment_id {
1937 my $text = shift();
1938 $text =~ s/\s+\Z//s;
1939 if( $text ){
1940 # a method or function?
1941 return $1 if $text =~ /(\w+)\s*\(/;
1942 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1943
1944 # a variable name?
1945 return $1 if $text =~ /^([$@%*]\S+)/;
1946
1947 # some pattern matching operator?
1948 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1949
1950 # fancy stuff... like "do { }"
1951 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1952
1953 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1954 # and some funnies with ... Module ...
1955 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1956 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1957
1958 # text? normalize!
1959 $text =~ s/\s+/_/sg;
1960 $text =~ s{(\W)}{
1961 defined( $hc[ord($1)] ) ? $hc[ord($1)]
1962 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1963 $text = substr( $text, 0, 50 );
1964 } else {
1965 return undef();
1966 }
54310121 1967}
1968
2a28b791 1969#
1970# make_URL_href - generate HTML href from URL
1971# Special treatment for CGI queries.
1972#
1973sub make_URL_href($){
1974 my( $url ) = @_;
1975 if( $url !~
1976 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)?$}{<A HREF="$1$2">$1</A>}i ){
1977 $url = "<A HREF=\"$url\">$url</A>";
1978 }
1979 return $url;
54310121 1980}
1981
19821;