more fixes for warnings from change#4840
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
2a28b791 8$VERSION = 1.03;
54310121 9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288 15use locale; # make \w work right in non-ASCII lands
16
54310121 17use strict;
18
7b8d334a 19use Config;
20
54310121 21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121 24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121 28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3 48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9 53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121 57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
34db337b 140=item css
141
142 --css=stylesheet
143
144Specify the URL of a cascading style sheet.
145
54310121 146=item verbose
147
148 --verbose
149
150Display progress messages.
151
34db337b 152=item quiet
153
154 --quiet
155
156Don't display I<mostly harmless> warning messages.
157
54310121 158=back
159
160=head1 EXAMPLE
161
162 pod2html("pod2html",
163 "--podpath=lib:ext:pod:vms",
164 "--podroot=/usr/src/perl",
165 "--htmlroot=/perl/nmanual",
166 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
167 "--recurse",
168 "--infile=foo.pod",
169 "--outfile=/perl/nmanual/foo.html");
170
34db337b 171=head1 ENVIRONMENT
172
173Uses $Config{pod2html} to setup default options.
174
54310121 175=head1 AUTHOR
176
177Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
178
54310121 179=head1 SEE ALSO
180
181L<perlpod>
182
183=head1 COPYRIGHT
184
185This program is distributed under the Artistic License.
186
187=cut
188
1f763251 189my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
190my $dircache = "pod2htmd$cache_ext";
191my $itemcache = "pod2htmi$cache_ext";
54310121 192
193my @begin_stack = (); # begin/end stack
194
29f227c9 195my @libpods = (); # files to search for links from C<> directives
196my $htmlroot = "/"; # http-server base directory from which all
54310121 197 # relative paths in $podpath stem.
5a039dd3 198my $htmldir = ""; # The directory to which the html pages
199 # will (eventually) be written.
54310121 200my $htmlfile = ""; # write to stdout by default
29f227c9 201my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 202 # refer to this file. This is only used
203 # to make relative urls that point to
204 # other files.
54310121 205my $podfile = ""; # read from stdin by default
206my @podpath = (); # list of directories containing library pods.
207my $podroot = "."; # filesystem base directory from which all
208 # relative paths in $podpath stem.
34db337b 209my $css = ''; # Cascading style sheet
54310121 210my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 211my $quiet = 0; # not quiet by default
54310121 212my $verbose = 0; # not verbose by default
213my $doindex = 1; # non-zero if we should generate an index
214my $listlevel = 0; # current list depth
2a28b791 215my @listend = (); # the text to use to end the list.
216my $after_lpar = 0; # set to true after a par in an =item
54310121 217my $ignore = 1; # whether or not to format text. we don't
218 # format text until we hit our first pod
219 # directive.
220
221my %items_named = (); # for the multiples of the same item in perlfunc
222my @items_seen = ();
223my $netscape = 0; # whether or not to use netscape directives.
224my $title; # title to give the pod(s)
34db337b 225my $header = 0; # produce block header/footer
54310121 226my $top = 1; # true if we are at the top of the doc. used
227 # to prevent the first <HR> directive.
228my $paragraph; # which paragraph we're processing (used
229 # for error messages)
2a28b791 230my $ptQuote = 0; # status of double-quote conversion
54310121 231my %pages = (); # associative array used to find the location
232 # of pages referenced by L<> links.
233my %sections = (); # sections within this page
234my %items = (); # associative array used to find the location
235 # of =item directives referenced by C<> links
2a28b791 236my %local_items = (); # local items - avoid destruction of %items
39e571d4 237my $Is83; # is dos with short filenames (8.3)
238
54310121 239sub init_globals {
04f720db 240$dircache = "pod2htmd$cache_ext";
241$itemcache = "pod2htmi$cache_ext";
54310121 242
243@begin_stack = (); # begin/end stack
244
245@libpods = (); # files to search for links from C<> directives
246$htmlroot = "/"; # http-server base directory from which all
247 # relative paths in $podpath stem.
248$htmlfile = ""; # write to stdout by default
249$podfile = ""; # read from stdin by default
250@podpath = (); # list of directories containing library pods.
251$podroot = "."; # filesystem base directory from which all
252 # relative paths in $podpath stem.
34db337b 253$css = ''; # Cascading style sheet
54310121 254$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 255$quiet = 0; # not quiet by default
54310121 256$verbose = 0; # not verbose by default
257$doindex = 1; # non-zero if we should generate an index
258$listlevel = 0; # current list depth
2a28b791 259@listend = (); # the text to use to end the list.
260$after_lpar = 0; # set to true after a par in an =item
54310121 261$ignore = 1; # whether or not to format text. we don't
262 # format text until we hit our first pod
263 # directive.
264
265@items_seen = ();
266%items_named = ();
267$netscape = 0; # whether or not to use netscape directives.
34db337b 268$header = 0; # produce block header/footer
54310121 269$title = ''; # title to give the pod(s)
270$top = 1; # true if we are at the top of the doc. used
271 # to prevent the first <HR> directive.
272$paragraph = ''; # which paragraph we're processing (used
273 # for error messages)
54310121 274%sections = (); # sections within this page
3e3baf6d 275
276# These are not reinitialised here but are kept as a cache.
277# See get_cache and related cache management code.
278#%pages = (); # associative array used to find the location
279 # of pages referenced by L<> links.
280#%items = (); # associative array used to find the location
54310121 281 # of =item directives referenced by C<> links
2a28b791 282%local_items = ();
39e571d4 283$Is83=$^O eq 'dos';
54310121 284}
285
2a28b791 286#
287# clean_data: global clean-up of pod data
288#
289sub clean_data($){
290 my( $dataref ) = @_;
291 my $i;
292 for( $i = 0; $i <= $#$dataref; $i++ ){
293 ${$dataref}[$i] =~ s/\s+\Z//;
294
295 # have a look for all-space lines
296 if( ${$dataref}[$i] =~ /^\s+$/m ){
297 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
298 splice( @$dataref, $i, 1, @chunks );
299 }
300 }
301}
302
303
54310121 304sub pod2html {
305 local(@ARGV) = @_;
306 local($/);
307 local $_;
308
309 init_globals();
310
39e571d4 311 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
312
54310121 313 # cache of %pages and %items from last time we ran pod2html
54310121 314
315 #undef $opt_help if defined $opt_help;
316
317 # parse the command-line parameters
318 parse_command_line();
319
320 # set some variables to their default values if necessary
321 local *POD;
322 unless (@ARGV && $ARGV[0]) {
323 $podfile = "-" unless $podfile; # stdin
324 open(POD, "<$podfile")
325 || die "$0: cannot open $podfile file for input: $!\n";
326 } else {
327 $podfile = $ARGV[0]; # XXX: might be more filenames
328 *POD = *ARGV;
329 }
330 $htmlfile = "-" unless $htmlfile; # stdout
331 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9 332 $htmldir =~ s#/$## ; # so we don't get a //
333 if ( $htmlroot eq ''
334 && defined( $htmldir )
335 && $htmldir ne ''
336 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
337 )
5a039dd3 338 {
29f227c9 339 # Set the 'base' url for this file, so that we can use it
340 # as the location from which to calculate relative links
341 # to other files. If this is '', then absolute links will
342 # be used throughout.
343 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 344 }
54310121 345
346 # read the pod a paragraph at a time
347 warn "Scanning for sections in input file(s)\n" if $verbose;
348 $/ = "";
349 my @poddata = <POD>;
350 close(POD);
2a28b791 351 clean_data( \@poddata );
54310121 352
353 # scan the pod for =head[1-6] directives and build an index
354 my $index = scan_headings(\%sections, @poddata);
355
3e3baf6d 356 unless($index) {
31e56455 357 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 358 }
359
54310121 360 # open the output file
361 open(HTML, ">$htmlfile")
362 || die "$0: cannot open $htmlfile file for output: $!\n";
363
d011ffae 364 # put a title in the HTML file if one wasn't specified
365 if ($title eq '') {
366 TITLE_SEARCH: {
367 for (my $i = 0; $i < @poddata; $i++) {
368 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
369 for my $para ( @poddata[$i, $i+1] ) {
370 last TITLE_SEARCH
371 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
372 }
373 }
54310121 374
d011ffae 375 }
376 }
377 }
3e3baf6d 378 if (!$title and $podfile =~ /\.pod$/) {
379 # probably a split pod so take first =head[12] as title
380 for (my $i = 0; $i < @poddata; $i++) {
381 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
382 }
383 warn "adopted '$title' as title for $podfile\n"
384 if $verbose and $title;
385 }
7b8d334a 386 if ($title) {
387 $title =~ s/\s*\(.*\)//;
388 } else {
34db337b 389 warn "$0: no title for $podfile" unless $quiet;
54310121 390 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
391 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 392 warn "using $title" if $verbose;
54310121 393 }
34db337b 394 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
395 $csslink =~ s,\\,/,g;
396 $csslink =~ s,(/.):,$1|,;
397
398 my $block = $header ? <<END_OF_BLOCK : '';
399<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
400<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
401<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
402</TD></TR>
403</TABLE>
404END_OF_BLOCK
405
54310121 406 print HTML <<END_OF_HEAD;
7b8d334a 407<HTML>
408<HEAD>
34db337b 409<TITLE>$title</TITLE>$csslink
7b8d334a 410<LINK REV="made" HREF="mailto:$Config{perladmin}">
411</HEAD>
54310121 412
7b8d334a 413<BODY>
34db337b 414$block
54310121 415END_OF_HEAD
416
3e3baf6d 417 # load/reload/validate/cache %pages and %items
418 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 419
420 # scan the pod for =item directives
2a28b791 421 scan_items( \%local_items, "", @poddata);
54310121 422
423 # put an index at the top of the file. note, if $doindex is 0 we
424 # still generate an index, but surround it with an html comment.
425 # that way some other program can extract it if desired.
426 $index =~ s/--+/-/g;
2a28b791 427 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 428 print HTML "<!-- INDEX BEGIN -->\n";
429 print HTML "<!--\n" unless $doindex;
430 print HTML $index;
431 print HTML "-->\n" unless $doindex;
432 print HTML "<!-- INDEX END -->\n\n";
31e56455 433 print HTML "<HR>\n" if $doindex and $index;
54310121 434
435 # now convert this file
2a28b791 436 my $after_item; # set to true after an =item
437 warn "Converting input file $podfile\n" if $verbose;
438 foreach my $i (0..$#poddata){
439 $ptQuote = 0; # status of quote conversion
440
54310121 441 $_ = $poddata[$i];
442 $paragraph = $i+1;
443 if (/^(=.*)/s) { # is it a pod directive?
444 $ignore = 0;
2a28b791 445 $after_item = 0;
54310121 446 $_ = $1;
447 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
448 process_begin($1, $2);
449 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
450 process_end($1, $2);
451 } elsif (/^=cut/) { # =cut
452 process_cut();
453 } elsif (/^=pod/) { # =pod
454 process_pod();
455 } else {
456 next if @begin_stack && $begin_stack[-1] ne 'html';
457
7b8d334a 458 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 459 process_head( $1, $2, $doindex && $index );
460 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
461 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 462 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791 463 process_item( $1 );
464 $after_item = 1;
54310121 465 } elsif (/^=over\s*(.*)/) { # =over N
466 process_over();
467 } elsif (/^=back/) { # =back
468 process_back();
469 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
470 process_for($1,$2);
471 } else {
472 /^=(\S*)\s*/;
473 warn "$0: $podfile: unknown pod directive '$1' in "
474 . "paragraph $paragraph. ignoring.\n";
475 }
476 }
477 $top = 0;
478 }
479 else {
480 next if $ignore;
481 next if @begin_stack && $begin_stack[-1] ne 'html';
482 my $text = $_;
2a28b791 483 if( $text =~ /\A\s+/ ){
484 process_pre( \$text );
485 print HTML "<PRE>\n$text</PRE>\n";
486
487 } else {
488 process_text( \$text );
489
490 # experimental: check for a paragraph where all lines
491 # have some ...\t...\t...\n pattern
492 if( $text =~ /\t/ ){
493 my @lines = split( "\n", $text );
494 if( @lines > 1 ){
495 my $all = 2;
496 foreach my $line ( @lines ){
497 if( $line =~ /\S/ && $line !~ /\t/ ){
498 $all--;
499 last if $all == 0;
500 }
501 }
502 if( $all > 0 ){
503 $text =~ s/\t+/<TD>/g;
504 $text =~ s/^/<TR><TD>/gm;
505 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
506 $text . '</TABLE>';
507 }
508 }
509 }
510 ## end of experimental
511
512 if( $after_item ){
513 print HTML "$text\n";
514 $after_lpar = 1;
515 } else {
516 print HTML "<P>$text</P>\n";
517 }
518 }
519 $after_item = 0;
54310121 520 }
521 }
522
523 # finish off any pending directives
524 finish_list();
2a28b791 525
526 # link to page index
527 print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
528 if $doindex and $index;
529
54310121 530 print HTML <<END_OF_TAIL;
34db337b 531$block
7b8d334a 532</BODY>
54310121 533
7b8d334a 534</HTML>
54310121 535END_OF_TAIL
536
537 # close the html file
538 close(HTML);
539
540 warn "Finished\n" if $verbose;
541}
542
543##############################################################################
544
545my $usage; # see below
546sub usage {
547 my $podfile = shift;
548 warn "$0: $podfile: @_\n" if @_;
549 die $usage;
550}
551
552$usage =<<END_OF_USAGE;
553Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
554 --podpath=<name>:...:<name> --podroot=<name>
555 --libpods=<name>:...:<name> --recurse --verbose --index
556 --netscape --norecurse --noindex
557
558 --flush - flushes the item and directory caches.
559 --help - prints this message.
560 --htmlroot - http-server base directory from which all relative paths
561 in podpath stem (default is /).
562 --index - generate an index at the top of the resulting html
563 (default).
564 --infile - filename for the pod to convert (input taken from stdin
565 by default).
566 --libpods - colon-separated list of pages to search for =item pod
567 directives in as targets of C<> and implicit links (empty
568 by default). note, these are not filenames, but rather
569 page names like those that appear in L<> links.
570 --netscape - will use netscape html directives when applicable.
571 --nonetscape - will not use netscape directives (default).
572 --outfile - filename for the resulting html file (output sent to
573 stdout by default).
574 --podpath - colon-separated list of directories containing library
575 pods. empty by default.
576 --podroot - filesystem base directory from which all relative paths
577 in podpath stem (default is .).
578 --noindex - don't generate an index at the top of the resulting html.
579 --norecurse - don't recurse on those subdirectories listed in podpath.
580 --recurse - recurse on those subdirectories listed in podpath
581 (default behavior).
582 --title - title that will appear in resulting html file.
34db337b 583 --header - produce block header/footer
584 --css - stylesheet URL
54310121 585 --verbose - self-explanatory
34db337b 586 --quiet - supress some benign warning messages
54310121 587
588END_OF_USAGE
589
590sub parse_command_line {
2a28b791 591 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
34db337b 592 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 593 my $result = GetOptions(
29f227c9 594 'flush' => \$opt_flush,
595 'help' => \$opt_help,
596 'htmldir=s' => \$opt_htmldir,
54310121 597 'htmlroot=s' => \$opt_htmlroot,
29f227c9 598 'index!' => \$opt_index,
54310121 599 'infile=s' => \$opt_infile,
600 'libpods=s' => \$opt_libpods,
601 'netscape!' => \$opt_netscape,
602 'outfile=s' => \$opt_outfile,
603 'podpath=s' => \$opt_podpath,
604 'podroot=s' => \$opt_podroot,
54310121 605 'recurse!' => \$opt_recurse,
606 'title=s' => \$opt_title,
34db337b 607 'header' => \$opt_header,
608 'css=s' => \$opt_css,
54310121 609 'verbose' => \$opt_verbose,
34db337b 610 'quiet' => \$opt_quiet,
54310121 611 );
612 usage("-", "invalid parameters") if not $result;
613
614 usage("-") if defined $opt_help; # see if the user asked for help
615 $opt_help = ""; # just to make -w shut-up.
616
617 $podfile = $opt_infile if defined $opt_infile;
618 $htmlfile = $opt_outfile if defined $opt_outfile;
5a039dd3 619 $htmldir = $opt_htmldir if defined $opt_outfile;
54310121 620
621 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
622 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
623
624 warn "Flushing item and directory caches\n"
625 if $opt_verbose && defined $opt_flush;
626 unlink($dircache, $itemcache) if defined $opt_flush;
627
628 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
629 $podroot = $opt_podroot if defined $opt_podroot;
630
631 $doindex = $opt_index if defined $opt_index;
632 $recurse = $opt_recurse if defined $opt_recurse;
633 $title = $opt_title if defined $opt_title;
34db337b 634 $header = defined $opt_header ? 1 : 0;
635 $css = $opt_css if defined $opt_css;
54310121 636 $verbose = defined $opt_verbose ? 1 : 0;
34db337b 637 $quiet = defined $opt_quiet ? 1 : 0;
54310121 638 $netscape = $opt_netscape if defined $opt_netscape;
639}
640
3e3baf6d 641
642my $saved_cache_key;
643
644sub get_cache {
645 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
646 my @cache_key_args = @_;
647
648 # A first-level cache:
649 # Don't bother reading the cache files if they still apply
650 # and haven't changed since we last read them.
651
652 my $this_cache_key = cache_key(@cache_key_args);
653
654 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
655
656 # load the cache of %pages and %items if possible. $tests will be
657 # non-zero if successful.
658 my $tests = 0;
659 if (-f $dircache && -f $itemcache) {
660 warn "scanning for item cache\n" if $verbose;
661 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
662 }
663
664 # if we didn't succeed in loading the cache then we must (re)build
665 # %pages and %items.
666 if (!$tests) {
667 warn "scanning directories in pod-path\n" if $verbose;
668 scan_podpath($podroot, $recurse, 0);
669 }
670 $saved_cache_key = cache_key(@cache_key_args);
671}
672
673sub cache_key {
674 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
675 return join('!', $dircache, $itemcache, $recurse,
29f227c9 676 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 677}
678
54310121 679#
3e3baf6d 680# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 681# are valid caches of %pages and %items. if they are valid then it loads
682# them and returns a non-zero value.
683#
3e3baf6d 684sub load_cache {
54310121 685 my($dircache, $itemcache, $podpath, $podroot) = @_;
686 my($tests);
687 local $_;
688
689 $tests = 0;
690
691 open(CACHE, "<$itemcache") ||
692 die "$0: error opening $itemcache for reading: $!\n";
693 $/ = "\n";
694
695 # is it the same podpath?
696 $_ = <CACHE>;
697 chomp($_);
3e3baf6d 698 $tests++ if (join(":", @$podpath) eq $_);
54310121 699
700 # is it the same podroot?
701 $_ = <CACHE>;
702 chomp($_);
703 $tests++ if ($podroot eq $_);
704
705 # load the cache if its good
706 if ($tests != 2) {
707 close(CACHE);
54310121 708 return 0;
709 }
710
711 warn "loading item cache\n" if $verbose;
712 while (<CACHE>) {
713 /(.*?) (.*)$/;
714 $items{$1} = $2;
715 }
716 close(CACHE);
717
718 warn "scanning for directory cache\n" if $verbose;
719 open(CACHE, "<$dircache") ||
720 die "$0: error opening $dircache for reading: $!\n";
721 $/ = "\n";
722 $tests = 0;
723
724 # is it the same podpath?
725 $_ = <CACHE>;
726 chomp($_);
3e3baf6d 727 $tests++ if (join(":", @$podpath) eq $_);
54310121 728
729 # is it the same podroot?
730 $_ = <CACHE>;
731 chomp($_);
732 $tests++ if ($podroot eq $_);
733
734 # load the cache if its good
735 if ($tests != 2) {
736 close(CACHE);
54310121 737 return 0;
738 }
739
740 warn "loading directory cache\n" if $verbose;
741 while (<CACHE>) {
742 /(.*?) (.*)$/;
743 $pages{$1} = $2;
744 }
745
746 close(CACHE);
747
748 return 1;
749}
750
751#
752# scan_podpath - scans the directories specified in @podpath for directories,
753# .pod files, and .pm files. it also scans the pod files specified in
754# @libpods for =item directives.
755#
756sub scan_podpath {
3e3baf6d 757 my($podroot, $recurse, $append) = @_;
54310121 758 my($pwd, $dir);
759 my($libpod, $dirname, $pod, @files, @poddata);
760
3e3baf6d 761 unless($append) {
762 %items = ();
763 %pages = ();
764 }
765
54310121 766 # scan each directory listed in @podpath
767 $pwd = getcwd();
768 chdir($podroot)
769 || die "$0: error changing to directory $podroot: $!\n";
770 foreach $dir (@podpath) {
771 scan_dir($dir, $recurse);
772 }
773
774 # scan the pods listed in @libpods for =item directives
775 foreach $libpod (@libpods) {
776 # if the page isn't defined then we won't know where to find it
777 # on the system.
778 next unless defined $pages{$libpod} && $pages{$libpod};
779
780 # if there is a directory then use the .pod and .pm files within it.
29f227c9 781 # NOTE: Only finds the first so-named directory in the tree.
782# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
783 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 784 # find all the .pod and .pm files within the directory
785 $dirname = $1;
786 opendir(DIR, $dirname) ||
787 die "$0: error opening directory $dirname: $!\n";
788 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
789 closedir(DIR);
790
791 # scan each .pod and .pm file for =item directives
792 foreach $pod (@files) {
793 open(POD, "<$dirname/$pod") ||
794 die "$0: error opening $dirname/$pod for input: $!\n";
795 @poddata = <POD>;
796 close(POD);
2a28b791 797 clean_data( \@poddata );
54310121 798
2a28b791 799 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 800 }
801
802 # use the names of files as =item directives too.
2a28b791 803### Don't think this should be done this way - confuses issues.(WL)
804### foreach $pod (@files) {
805### $pod =~ /^(.*)(\.pod|\.pm)$/;
806### $items{$1} = "$dirname/$1.html" if $1;
807### }
54310121 808 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
809 $pages{$libpod} =~ /([^:]*\.pm):/) {
810 # scan the .pod or .pm file for =item directives
811 $pod = $1;
812 open(POD, "<$pod") ||
813 die "$0: error opening $pod for input: $!\n";
814 @poddata = <POD>;
815 close(POD);
2a28b791 816 clean_data( \@poddata );
54310121 817
2a28b791 818 scan_items( \%items, "$pod", @poddata);
54310121 819 } else {
820 warn "$0: shouldn't be here (line ".__LINE__."\n";
821 }
822 }
823 @poddata = (); # clean-up a bit
824
825 chdir($pwd)
826 || die "$0: error changing to directory $pwd: $!\n";
827
828 # cache the item list for later use
829 warn "caching items for later use\n" if $verbose;
830 open(CACHE, ">$itemcache") ||
831 die "$0: error open $itemcache for writing: $!\n";
832
833 print CACHE join(":", @podpath) . "\n$podroot\n";
834 foreach my $key (keys %items) {
835 print CACHE "$key $items{$key}\n";
836 }
837
838 close(CACHE);
839
840 # cache the directory list for later use
841 warn "caching directories for later use\n" if $verbose;
842 open(CACHE, ">$dircache") ||
843 die "$0: error open $dircache for writing: $!\n";
844
845 print CACHE join(":", @podpath) . "\n$podroot\n";
846 foreach my $key (keys %pages) {
847 print CACHE "$key $pages{$key}\n";
848 }
849
850 close(CACHE);
851}
852
853#
854# scan_dir - scans the directory specified in $dir for subdirectories, .pod
855# files, and .pm files. notes those that it finds. this information will
856# be used later in order to figure out where the pages specified in L<>
857# links are on the filesystem.
858#
859sub scan_dir {
860 my($dir, $recurse) = @_;
861 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
862 local $_;
863
864 @subdirs = ();
865 @pods = ();
866
867 opendir(DIR, $dir) ||
868 die "$0: error opening directory $dir: $!\n";
869 while (defined($_ = readdir(DIR))) {
870 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
871 $pages{$_} = "" unless defined $pages{$_};
872 $pages{$_} .= "$dir/$_:";
873 push(@subdirs, $_);
874 } elsif (/\.pod$/) { # .pod
875 s/\.pod$//;
876 $pages{$_} = "" unless defined $pages{$_};
877 $pages{$_} .= "$dir/$_.pod:";
878 push(@pods, "$dir/$_.pod");
879 } elsif (/\.pm$/) { # .pm
880 s/\.pm$//;
881 $pages{$_} = "" unless defined $pages{$_};
882 $pages{$_} .= "$dir/$_.pm:";
883 push(@pods, "$dir/$_.pm");
884 }
885 }
886 closedir(DIR);
887
888 # recurse on the subdirectories if necessary
889 if ($recurse) {
890 foreach my $subdir (@subdirs) {
891 scan_dir("$dir/$subdir", $recurse);
892 }
893 }
894}
895
896#
897# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
898# build an index.
899#
900sub scan_headings {
901 my($sections, @data) = @_;
2a28b791 902 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 903
be173d55 904 # here we need local $ignore = 0;
905 # unfortunately, we can't have it, because $ignore is lexical
906 $ignore = 0;
907
54310121 908 $listdepth = 0;
909 $index = "";
910
911 # scan for =head directives, note their name, and build an index
912 # pointing to each of them.
913 foreach my $line (@data) {
bb9460ed 914 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 915 ($tag, $which_head, $otitle) = ($1,$2,$3);
916
917 my $title = depod( $otitle );
918 my $name = htmlify( $title );
919 $$sections{$name} = 1;
920 $title = process_text( \$otitle );
54310121 921
102c538a 922 while ($which_head != $listdepth) {
923 if ($which_head > $listdepth) {
924 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
925 $listdepth++;
926 } elsif ($which_head < $listdepth) {
927 $listdepth--;
928 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
929 }
54310121 930 }
54310121 931
932 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 933 "<A HREF=\"#" . $name . "\">" .
934 $title . "</A></LI>";
54310121 935 }
936 }
937
938 # finish off the lists
939 while ($listdepth--) {
940 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
941 }
942
943 # get rid of bogus lists
944 $index =~ s,\t*<UL>\s*</UL>\n,,g;
945
bb9460ed 946 $ignore = 1; # restore old value;
be173d55 947
54310121 948 return $index;
949}
950
951#
952# scan_items - scans the pod specified by $pod for =item directives. we
953# will use this information later on in resolving C<> links.
954#
955sub scan_items {
2a28b791 956 my( $itemref, $pod, @poddata ) = @_;
54310121 957 my($i, $item);
958 local $_;
959
960 $pod =~ s/\.pod$//;
961 $pod .= ".html" if $pod;
962
963 foreach $i (0..$#poddata) {
2a28b791 964 my $txt = depod( $poddata[$i] );
965
966 # figure out what kind of item it is.
967 # Build string for referencing this item.
968 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
969 next unless $1;
970 $item = $1;
971 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
972 $item = $1;
973 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
974 $item = $1;
975 } else {
976 next;
54310121 977 }
2a28b791 978 my $fid = fragment_id( $item );
979 $$itemref{$fid} = "$pod" if $fid;
54310121 980 }
981}
982
983#
984# process_head - convert a pod head[1-6] tag and convert it to HTML format.
985#
986sub process_head {
2a28b791 987 my($tag, $heading, $hasindex) = @_;
54310121 988
989 # figure out the level of the =head
990 $tag =~ /head([1-6])/;
991 my $level = $1;
992
2a28b791 993 if( $listlevel ){
994 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
995 while( $listlevel ){
996 process_back();
997 }
998 }
999
1000 print HTML "<P>\n";
1001 if( $level == 1 && ! $top ){
1002 print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1003 if $hasindex;
1004 print HTML "<HR>\n"
1005 }
1006
1007 my $name = htmlify( depod( $heading ) );
1008 my $convert = process_text( \$heading );
1009 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1010}
1011
2a28b791 1012
54310121 1013#
2a28b791 1014# emit_item_tag - print an =item's text
1015# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1016#
2a28b791 1017my $EmittedItem;
1018
1019sub emit_item_tag($$$){
1020 my( $otext, $text, $compact ) = @_;
1021 my $item = fragment_id( $text );
54310121 1022
2a28b791 1023 $EmittedItem = $item;
1024 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1025
2a28b791 1026 print HTML '<STRONG>';
1027 if ($items_named{$item}++) {
1028 print HTML process_text( \$otext );
1029 } else {
1030 my $name = 'item_' . $item;
1031 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1032 }
1033 print HTML "</STRONG><BR>\n";
1034 undef( $EmittedItem );
1035}
1036
1037sub emit_li {
1038 my( $tag ) = @_;
1039 if( $items_seen[$listlevel]++ == 0 ){
1040 push( @listend, "</$tag>" );
1041 print HTML "<$tag>\n";
1042 }
1043 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1044}
1045
1046#
1047# process_item - convert a pod item tag and convert it to HTML format.
1048#
1049sub process_item {
1050 my( $otext ) = @_;
54310121 1051
1052 # lots of documents start a list without doing an =over. this is
1053 # bad! but, the proper thing to do seems to be to just assume
1054 # they did do an =over. so warn them once and then continue.
2a28b791 1055 if( $listlevel == 0 ){
1056 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1057 process_over();
1058 }
54310121 1059
2a28b791 1060 # formatting: insert a paragraph if preceding item has >1 paragraph
1061 if( $after_lpar ){
1062 print HTML "<P></P>\n";
1063 $after_lpar = 0;
1064 }
54310121 1065
1066 # remove formatting instructions from the text
2a28b791 1067 my $text = depod( $otext );
1068
1069 # all the list variants:
1070 if( $text =~ /\A\*/ ){ # bullet
1071 emit_li( 'UL' );
1072 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1073 my $tag = $1;
1074 $otext =~ s/\A\*\s+//;
1075 emit_item_tag( $otext, $tag, 1 );
54310121 1076 }
1077
2a28b791 1078 } elsif( $text =~ /\A\d+/ ){ # numbered list
1079 emit_li( 'OL' );
1080 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1081 my $tag = $1;
1082 $otext =~ s/\A\d+\.?\s*//;
1083 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1084 }
54310121 1085
2a28b791 1086 } else { # definition list
1087 emit_li( 'DL' );
1088 if ($text =~ /\A(.+)\Z/s ){ # should have text
1089 emit_item_tag( $otext, $text, 1 );
7b8d334a 1090 }
54310121 1091 print HTML '<DD>';
1092 }
54310121 1093 print HTML "\n";
1094}
1095
1096#
2a28b791 1097# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1098#
1099sub process_over {
1100 # start a new list
1101 $listlevel++;
2a28b791 1102 push( @items_seen, 0 );
1103 $after_lpar = 0;
54310121 1104}
1105
1106#
1107# process_back - process a pod back tag and convert it to HTML format.
1108#
1109sub process_back {
2a28b791 1110 if( $listlevel == 0 ){
1111 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1112 return;
1113 }
54310121 1114
1115 # close off the list. note, I check to see if $listend[$listlevel] is
1116 # defined because an =item directive may have never appeared and thus
1117 # $listend[$listlevel] may have never been initialized.
1118 $listlevel--;
2a28b791 1119 if( defined $listend[$listlevel] ){
1120 print HTML '<P></P>' if $after_lpar;
1121 print HTML $listend[$listlevel];
1122 print HTML "\n";
1123 pop( @listend );
1124 }
1125 $after_lpar = 0;
54310121 1126
2a28b791 1127 # clean up item count
1128 pop( @items_seen );
54310121 1129}
1130
1131#
2a28b791 1132# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1133#
1134sub process_cut {
1135 $ignore = 1;
1136}
1137
1138#
2a28b791 1139# process_pod - process a pod pod tag, thus stop ignoring pod directives
1140# until we see a corresponding cut.
54310121 1141#
1142sub process_pod {
1143 # no need to set $ignore to 0 cause the main loop did it
1144}
1145
1146#
2a28b791 1147# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1148# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1149#
1150sub process_for {
1151 my($whom, $text) = @_;
1152 if ( $whom =~ /^(pod2)?html$/i) {
1153 print HTML $text;
c4d9b39d 1154 } elsif ($whom =~ /^illustration$/i) {
1155 1 while chomp $text;
1156 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1157 $text .= $ext, last if -r "$text$ext";
1158 }
1159 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1160 }
54310121 1161}
1162
1163#
1164# process_begin - process a =begin pod tag. this pushes
1165# whom we're beginning on the begin stack. if there's a
1166# begin stack, we only print if it us.
1167#
1168sub process_begin {
1169 my($whom, $text) = @_;
1170 $whom = lc($whom);
1171 push (@begin_stack, $whom);
1172 if ( $whom =~ /^(pod2)?html$/) {
1173 print HTML $text if $text;
1174 }
1175}
1176
1177#
1178# process_end - process a =end pod tag. pop the
1179# begin stack. die if we're mismatched.
1180#
1181sub process_end {
1182 my($whom, $text) = @_;
1183 $whom = lc($whom);
1184 if ($begin_stack[-1] ne $whom ) {
1185 die "Unmatched begin/end at chunk $paragraph\n"
1186 }
2a28b791 1187 pop( @begin_stack );
54310121 1188}
1189
1190#
2a28b791 1191# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1192#
2a28b791 1193sub process_pre {
1194 my( $text ) = @_;
1195 my( $rest );
54310121 1196 return if $ignore;
1197
54310121 1198 $rest = $$text;
1199
2a28b791 1200 # insert spaces in place of tabs
1201 $rest =~ s#.*#
be173d55 1202 my $line = $&;
1203 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1204 $line;
1205 #eg;
54310121 1206
2a28b791 1207 # convert some special chars to HTML escapes
1208 $rest =~ s/&/&amp;/g;
1209 $rest =~ s/</&lt;/g;
1210 $rest =~ s/>/&gt;/g;
1211 $rest =~ s/"/&quot;/g;
1212
1213 # try and create links for all occurrences of perl.* within
1214 # the preformatted text.
1215 $rest =~ s{
1216 (\s*)(perl\w+)
1217 }{
1218 if ( defined $pages{$2} ){ # is a link
1219 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1220 } elsif (defined $pages{dosify($2)}) { # is a link
1221 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1222 } else {
1223 "$1$2";
1224 }
1225 }xeg;
1226 $rest =~ s{
1227 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1228 }{
1229 my $url ;
1230 if ( $htmlfileurl ne '' ){
1231 # Here, we take advantage of the knowledge
1232 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1233 # Since $htmlroot eq '', we need to prepend $htmldir
1234 # on the fron of the link to get the absolute path
1235 # of the link's target. We check for a leading '/'
1236 # to avoid corrupting links that are #, file:, etc.
1237 my $old_url = $3 ;
1238 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1239 $url = relativize_url( "$old_url.html", $htmlfileurl );
1240 } else {
1241 $url = "$3.html" ;
1242 }
1243 "$1$url" ;
1244 }xeg;
1245
1246 # Look for embedded URLs and make them into links. We don't
1247 # relativize them since they are best left as the author intended.
1248
1249 my $urls = '(' . join ('|', qw{
54310121 1250 http
1251 telnet
1252 mailto
1253 news
1254 gopher
1255 file
1256 wais
1257 ftp
1258 } )
1259 . ')';
1260
2a28b791 1261 my $ltrs = '\w';
1262 my $gunk = '/#~:.?+=&%@!\-';
1263 my $punc = '.:?\-';
1264 my $any = "${ltrs}${gunk}${punc}";
54310121 1265
2a28b791 1266 $rest =~ s{
54310121 1267 \b # start at word boundary
1268 ( # begin $1 {
29f227c9 1269 $urls : # need resource and a colon
1270 (?!:) # Ignore File::, among others.
54310121 1271 [$any] +? # followed by on or more
1272 # of any valid character, but
1273 # be conservative and take only
1274 # what you need to....
1275 ) # end $1 }
1276 (?= # look-ahead non-consumptive assertion
1277 [$punc]* # either 0 or more puntuation
1278 [^$any] # followed by a non-url char
1279 | # or else
1280 $ # then end of the string
1281 )
1282 }{<A HREF="$1">$1</A>}igox;
1283
2a28b791 1284 # text should be as it is (verbatim)
1285 $$text = $rest;
1286}
54310121 1287
54310121 1288
2a28b791 1289#
1290# pure text processing
1291#
1292# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1293# we don't want this to happen within IS
1294#
1295sub pure_text($){
1296 my $text = shift();
1297 process_puretext( $text, \$ptQuote, 1 );
54310121 1298}
1299
2a28b791 1300sub inIS_text($){
1301 my $text = shift();
1302 process_puretext( $text, \$ptQuote, 0 );
1303}
54310121 1304
1305#
1306# process_puretext - process pure text (without pod-escapes) converting
1307# double-quotes and handling implicit C<> links.
1308#
1309sub process_puretext {
2a28b791 1310 my($text, $quote, $notinIS) = @_;
54310121 1311
2a28b791 1312 ## Guessing at func() or [$@%&]*var references in plain text is destined
1313 ## to produce some strange looking ref's. uncomment to disable:
1314 ## $notinIS = 0;
1315
1316 my(@words, $lead, $trail);
54310121 1317
2a28b791 1318 # convert double-quotes to single-quotes
1319 if( $$quote && $text =~ s/"/''/s ){
1320 $$quote = 0;
1321 }
1322 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1323 $$quote = 1 if $text =~ s/"/``/s;
54310121 1324
1325 # keep track of leading and trailing white-space
2a28b791 1326 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1327 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1328
2a28b791 1329 # split at space/non-space boundaries
1330 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1331
1332 # process each word individually
1333 foreach my $word (@words) {
2a28b791 1334 # skip space runs
1335 next if $word =~ /^\s*$/;
54310121 1336 # see if we can infer a link
2a28b791 1337 if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
54310121 1338 # has parenthesis so should have been a C<> ref
2a28b791 1339 ## try for a pagename (perlXXX(1))?
1340 if( $2 =~ /^\d+$/ ){
1341 my $url = page_sect( $word, '' );
1342 if( defined $url ){
1343 $word = "<A HREF=\"$url\">the $word manpage</A>";
1344 next;
1345 }
1346 }
1347 $word = emit_C( $word );
1348
1349#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1350## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1351## # perl variables, should be a C<> ref
1352## $word = emit_C( $word );
1353
54310121 1354 } elsif ($word =~ m,^\w+://\w,) {
1355 # looks like a URL
5a039dd3 1356 # Don't relativize it: leave it as the author intended
54310121 1357 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1358 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1359 # looks like an e-mail address
7b8d334a 1360 my ($w1, $w2, $w3) = ("", $word, "");
1361 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1362 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1363 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1364 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1365 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1366 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1367 } else {
7b8d334a 1368 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1369 }
1370 }
1371
2a28b791 1372 # put everything back together
1373 return $lead . join( '', @words ) . $trail;
1374}
1375
54310121 1376
2a28b791 1377#
1378# process_text - handles plaintext that appears in the input pod file.
1379# there may be pod commands embedded within the text so those must be
1380# converted to html commands.
1381#
7ba65c74 1382
1383sub process_text1($$;$);
1384
2a28b791 1385sub process_text {
1386 return if $ignore;
1387 my( $tref ) = @_;
1388 my $res = process_text1( 0, $tref );
1389 $$tref = $res;
1390}
1391
1392sub process_text1($$;$){
1393 my( $lev, $rstr, $func ) = @_;
1394 $lev++ unless defined $func;
1395 my $res = '';
1396
228a48a5 1397 $func ||= '';
2a28b791 1398 if( $func eq 'B' ){
1399 # B<text> - boldface
1400 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1401
1402 } elsif( $func eq 'C' ){
1403 # C<code> - can be a ref or <CODE></CODE>
1404 # need to extract text
1405 my $par = go_ahead( $rstr, 'C' );
1406
1407 ## clean-up of the link target
1408 my $text = depod( $par );
1409
1410 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1411 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1412
1413 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1414
1415 } elsif( $func eq 'E' ){
1416 # E<x> - convert to character
1417 $$rstr =~ s/^(\w+)>//;
1418 $res = "&$1;";
1419
1420 } elsif( $func eq 'F' ){
1421 # F<filename> - italizice
1422 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1423
1424 } elsif( $func eq 'I' ){
1425 # I<text> - italizice
1426 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1427
1428 } elsif( $func eq 'L' ){
1429 # L<link> - link
1430 ## L<text|cross-ref> => produce text, use cross-ref for linking
1431 ## L<cross-ref> => make text from cross-ref
1432 ## need to extract text
1433 my $par = go_ahead( $rstr, 'L' );
1434
1435 # some L<>'s that shouldn't be:
1436 # a) full-blown URL's are emitted as-is
1437 if( $par =~ m{^\w+://}s ){
1438 return make_URL_href( $par );
1439 }
1440 # b) C<...> is stripped and treated as C<>
1441 if( $par =~ /^C<(.*)>$/ ){
1442 my $text = depod( $1 );
1443 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1444 }
1445
1446 # analyze the contents
1447 $par =~ s/\n/ /g; # undo word-wrapped tags
1448 my $opar = $par;
1449 my $linktext;
1450 if( $par =~ s{^([^|]+)\|}{} ){
1451 $linktext = $1;
1452 }
1453
1454 # make sure sections start with a /
1455 $par =~ s{^"}{/"};
1456
1457 my( $page, $section, $ident );
1458
1459 # check for link patterns
1460 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1461 # we've got a name/ident (no quotes)
1462 ( $page, $ident ) = ( $1, $2 );
1463 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1464
1465 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1466 # even though this should be a "section", we go for ident first
1467 ( $page, $ident ) = ( $1, $2 );
1468 ### print STDERR "--> L<$par> to page $page, section $section\n";
1469
1470 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1471 ( $page, $section ) = ( '', $par );
1472 ### print STDERR "--> L<$par> to void page, section $section\n";
1473
1474 } else {
1475 ( $page, $section ) = ( $par, '' );
1476 ### print STDERR "--> L<$par> to page $par, void section\n";
1477 }
1478
1479 # now, either $section or $ident is defined. the convoluted logic
1480 # below tries to resolve L<> according to what the user specified.
1481 # failing this, we try to find the next best thing...
1482 my( $url, $ltext, $fid );
1483
1484 RESOLVE: {
1485 if( defined $ident ){
1486 ## try to resolve $ident as an item
1487 ( $url, $fid ) = coderef( $page, $ident );
1488 if( $url ){
1489 if( ! defined( $linktext ) ){
1490 $linktext = $ident;
1491 $linktext .= " in " if $ident && $page;
1492 $linktext .= "the $page manpage" if $page;
1493 }
1494 ### print STDERR "got coderef url=$url\n";
1495 last RESOLVE;
1496 }
1497 ## no luck: go for a section (auto-quoting!)
1498 $section = $ident;
1499 }
1500 ## now go for a section
1501 my $htmlsection = htmlify( $section );
1502 $url = page_sect( $page, $htmlsection );
1503 if( $url ){
1504 if( ! defined( $linktext ) ){
1505 $linktext = $section;
1506 $linktext .= " in " if $section && $page;
1507 $linktext .= "the $page manpage" if $page;
1508 }
1509 ### print STDERR "got page/section url=$url\n";
1510 last RESOLVE;
1511 }
1512 ## no luck: go for an ident
1513 if( $section ){
1514 $ident = $section;
1515 } else {
1516 $ident = $page;
1517 $page = undef();
1518 }
1519 ( $url, $fid ) = coderef( $page, $ident );
1520 if( $url ){
1521 if( ! defined( $linktext ) ){
1522 $linktext = $ident;
1523 $linktext .= " in " if $ident && $page;
1524 $linktext .= "the $page manpage" if $page;
1525 }
1526 ### print STDERR "got section=>coderef url=$url\n";
1527 last RESOLVE;
1528 }
1529
1530 # warning; show some text.
1531 $linktext = $opar unless defined $linktext;
1532 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1533 }
1534
1535 # now we have an URL or just plain code
1536 $$rstr = $linktext . '>' . $$rstr;
1537 if( defined( $url ) ){
1538 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1539 } else {
1540 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1541 }
1542
1543 } elsif( $func eq 'S' ){
1544 # S<text> - non-breaking spaces
1545 $res = process_text1( $lev, $rstr );
1546 $res =~ s/ /&nbsp;/g;
1547
1548 } elsif( $func eq 'X' ){
1549 # X<> - ignore
1550 $$rstr =~ s/^[^>]*>//;
1551
1552 } elsif( $func eq 'Z' ){
1553 # Z<> - empty
1554 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1555 unless $$rstr =~ s/^>//;
1556
1557 } else {
1558 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1559 # all others: either recurse into new function or
1560 # terminate at closing angle bracket
1561 my $pt = $1;
1562 $pt .= '>' if $2 eq '>' && $lev == 1;
1563 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1564 return $res if $2 eq '>' && $lev > 1;
1565 if( $2 ne '>' ){
1566 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1567 }
1568
1569 }
1570 if( $lev == 1 ){
1571 $res .= pure_text( $$rstr );
54310121 1572 } else {
2a28b791 1573 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1574 }
1575 }
2a28b791 1576 return $res;
1577}
54310121 1578
2a28b791 1579#
1580# go_ahead: extract text of an IS (can be nested)
1581#
1582sub go_ahead($$){
1583 my( $rstr, $func ) = @_;
1584 my $res = '';
1585 my $level = 1;
1586 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1587 $res .= $1;
1588 if( $2 eq '>' ){
1589 return $res if --$level == 0;
1590 } else {
1591 ++$level;
1592 }
1593 $res .= $2;
1594 }
1595 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1596 return $res;
54310121 1597}
1598
1599#
2a28b791 1600# emit_C - output result of C<text>
1601# $text is the depod-ed text
54310121 1602#
2a28b791 1603sub emit_C($;$){
1604 my( $text, $nocode ) = @_;
1605 my $res;
1606 my( $url, $fid ) = coderef( undef(), $text );
1607
1608 # need HTML-safe text
1609 my $linktext = html_escape( $text );
1610
1611 if( defined( $url ) &&
1612 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1613 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1614 } elsif( 0 && $nocode ){
1615 $res = $linktext;
1616 } else {
1617 $res = "<CODE>$linktext</CODE>";
1618 }
1619 return $res;
54310121 1620}
1621
1622#
2a28b791 1623# html_escape: make text safe for HTML
1624#
1625sub html_escape {
1626 my $rest = $_[0];
1627 $rest =~ s/&/&amp;/g;
1628 $rest =~ s/</&lt;/g;
1629 $rest =~ s/>/&gt;/g;
1630 $rest =~ s/"/&quot;/g;
1631 return $rest;
1632}
1633
1634
1635#
39e571d4 1636# dosify - convert filenames to 8.3
1637#
1638sub dosify {
1639 my($str) = @_;
fe4c6be1 1640 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1641 if ($Is83) {
1642 $str = lc $str;
1643 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1644 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1645 }
1646 return $str;
1647}
1648
1649#
2a28b791 1650# page_sect - make an URL from the text of a L<>
54310121 1651#
2a28b791 1652sub page_sect($$) {
1653 my( $page, $section ) = @_;
1654 my( $linktext, $page83, $link); # work strings
1655
1656 # check if we know that this is a section in this page
1657 if (!defined $pages{$page} && defined $sections{$page}) {
1658 $section = $page;
1659 $page = "";
1660 ### print STDERR "reset page='', section=$section\n";
54310121 1661 }
1662
39e571d4 1663 $page83=dosify($page);
1664 $page=$page83 if (defined $pages{$page83});
54310121 1665 if ($page eq "") {
2a28b791 1666 $link = "#" . htmlify( $section );
350ccacd 1667 } elsif ( $page =~ /::/ ) {
350ccacd 1668 $page =~ s,::,/,g;
29f227c9 1669 # Search page cache for an entry keyed under the html page name,
1670 # then look to see what directory that page might be in. NOTE:
1671 # this will only find one page. A better solution might be to produce
1672 # an intermediate page that is an index to all such pages.
1673 my $page_name = $page ;
1674 $page_name =~ s,^.*/,, ;
1675 if ( defined( $pages{ $page_name } ) &&
1676 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1677 ) {
1678 $page = $1 ;
1679 }
1680 else {
1681 # NOTE: This branch assumes that all A::B pages are located in
1682 # $htmlroot/A/B.html . This is often incorrect, since they are
1683 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1684 # analyze the contents of %pages and figure out where any
1685 # cousins of A::B are, then assume that. So, if A::B isn't found,
1686 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1687 # lib/A/B.pm. This is also limited, but it's an improvement.
1688 # Maybe a hints file so that the links point to the correct places
2a28b791 1689 # nonetheless?
1690
29f227c9 1691 }
350ccacd 1692 $link = "$htmlroot/$page.html";
2a28b791 1693 $link .= "#" . htmlify( $section ) if ($section);
54310121 1694 } elsif (!defined $pages{$page}) {
54310121 1695 $link = "";
54310121 1696 } else {
2a28b791 1697 $section = htmlify( $section ) if $section ne "";
1698 ### print STDERR "...section=$section\n";
54310121 1699
1700 # if there is a directory by the name of the page, then assume that an
1701 # appropriate section will exist in the subdirectory
29f227c9 1702# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1703 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1704 $link = "$htmlroot/$1/$section.html";
2a28b791 1705 ### print STDERR "...link=$link\n";
54310121 1706
1707 # since there is no directory by the name of the page, the section will
1708 # have to exist within a .html of the same name. thus, make sure there
1709 # is a .pod or .pm that might become that .html
1710 } else {
2a28b791 1711 $section = "#$section" if $section;
1712 ### print STDERR "...section=$section\n";
1713
54310121 1714 # check if there is a .pod with the page name
1715 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1716 $link = "$htmlroot/$1.html$section";
1717 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1718 $link = "$htmlroot/$1.html$section";
1719 } else {
54310121 1720 $link = "";
54310121 1721 }
1722 }
1723 }
1724
54310121 1725 if ($link) {
29f227c9 1726 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1727 # implies $htmlroot eq ''. This means that the link in question
1728 # needs a prefix of $htmldir if it begins with '/'. The test for
1729 # the initial '/' is done to avoid '#'-only links, and to allow
1730 # for other kinds of links, like file:, ftp:, etc.
1731 my $url ;
1732 if ( $htmlfileurl ne '' ) {
2a28b791 1733 $link = "$htmldir$link" if $link =~ m{^/};
1734 $url = relativize_url( $link, $htmlfileurl );
1735# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1736 }
1737 else {
1738 $url = $link ;
1739 }
2a28b791 1740 return $url;
29f227c9 1741
54310121 1742 } else {
2a28b791 1743 return undef();
54310121 1744 }
54310121 1745}
1746
1747#
29f227c9 1748# relativize_url - convert an absolute URL to one relative to a base URL.
1749# Assumes both end in a filename.
1750#
1751sub relativize_url {
1752 my ($dest,$source) = @_ ;
1753
1754 my ($dest_volume,$dest_directory,$dest_file) =
1755 File::Spec::Unix->splitpath( $dest ) ;
1756 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1757
1758 my ($source_volume,$source_directory,$source_file) =
1759 File::Spec::Unix->splitpath( $source ) ;
1760 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1761
1762 my $rel_path = '' ;
1763 if ( $dest ne '' ) {
1764 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1765 }
1766
1767 if ( $rel_path ne '' &&
1768 substr( $rel_path, -1 ) ne '/' &&
1769 substr( $dest_file, 0, 1 ) ne '#'
1770 ) {
1771 $rel_path .= "/$dest_file" ;
1772 }
1773 else {
1774 $rel_path .= "$dest_file" ;
1775 }
1776
1777 return $rel_path ;
1778}
1779
54310121 1780
1781#
2a28b791 1782# coderef - make URL from the text of a C<>
54310121 1783#
2a28b791 1784sub coderef($$){
1785 my( $page, $item ) = @_;
1786 my( $url );
1787
1788 my $fid = fragment_id( $item );
228a48a5 1789 return( $url, $fid );
2a28b791 1790 if( defined( $page ) ){
1791 # we have been given a $page...
1792 $page =~ s{::}{/}g;
1793
1794 # Do we take it? Item could be a section!
228a48a5 1795 my $base = $items{$fid} || "";
2a28b791 1796 $base =~ s{[^/]*/}{};
1797 if( $base ne "$page.html" ){
1798 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1799 $page = undef();
1800 }
54310121 1801
2a28b791 1802 } else {
1803 # no page - local items precede cached items
1804 if( exists $local_items{$fid} ){
1805 $page = $local_items{$fid};
1806 } else {
1807 $page = $items{$fid};
1808 }
1809 }
54310121 1810
1811 # if there was a pod file that we found earlier with an appropriate
1812 # =item directive, then create a link to that page.
2a28b791 1813 if( defined $page ){
1814 if( $page ){
228a48a5 1815 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1816 $page = $1 . '.html';
29f227c9 1817 }
2a28b791 1818 my $link = "$htmlroot/$page#item_$fid";
54310121 1819
2a28b791 1820 # Here, we take advantage of the knowledge that $htmlfileurl
1821 # ne '' implies $htmlroot eq ''.
1822 if ( $htmlfileurl ne '' ) {
1823 $link = "$htmldir$link" ;
1824 $url = relativize_url( $link, $htmlfileurl ) ;
1825 } else {
1826 $url = $link ;
1827 }
1828 } else {
1829 $url = "#item_" . $fid;
1830 }
54310121 1831
2a28b791 1832 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1833 }
1834 return( $url, $fid );
54310121 1835}
1836
54310121 1837
1838
1839#
29f227c9 1840# Adapted from Nick Ing-Simmons' PodToHtml package.
1841sub relative_url {
1842 my $source_file = shift ;
1843 my $destination_file = shift;
1844
1845 my $source = URI::file->new_abs($source_file);
1846 my $uo = URI::file->new($destination_file,$source)->abs;
1847 return $uo->rel->as_string;
1848}
1849
1850
1851#
54310121 1852# finish_list - finish off any pending HTML lists. this should be called
1853# after the entire pod file has been read and converted.
1854#
1855sub finish_list {
7b8d334a 1856 while ($listlevel > 0) {
54310121 1857 print HTML "</DL>\n";
1858 $listlevel--;
1859 }
1860}
1861
1862#
1863# htmlify - converts a pod section specification to a suitable section
2a28b791 1864# specification for HTML. Note that we keep spaces and special characters
1865# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1866#
1867sub htmlify {
2a28b791 1868 my( $heading) = @_;
1869 $heading =~ s/(\s+)/ /g;
1870 $heading =~ s/\s+\Z//;
1871 $heading =~ s/\A\s+//;
1872 # The hyphen is a disgrace to the English language.
1873 $heading =~ s/[-"?]//g;
1874 $heading = lc( $heading );
1875 return $heading;
1876}
54310121 1877
2a28b791 1878#
1879# depod - convert text by eliminating all interior sequences
1880# Note: can be called with copy or modify semantics
1881#
1882my %E2c;
1883$E2c{lt} = '<';
1884$E2c{gt} = '>';
1885$E2c{sol} = '/';
1886$E2c{verbar} = '|';
1887
7ba65c74 1888sub depod1($;$);
1889
2a28b791 1890sub depod($){
1891 my $string;
1892 if( ref( $_[0] ) ){
1893 $string = ${$_[0]};
1894 ${$_[0]} = depod1( \$string );
1895 } else {
1896 $string = $_[0];
1897 depod1( \$string );
1898 }
1899}
54310121 1900
2a28b791 1901sub depod1($;$){
1902 my( $rstr, $func ) = @_;
1903 my $res = '';
228a48a5 1904 return $res unless defined $$rstr;
2a28b791 1905 if( ! defined( $func ) ){
1906 # skip to next begin of an interior sequence
1907 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1908 # recurse into its text
1909 $res .= $1 . depod1( $rstr, $2 );
1910 }
1911 $res .= $$rstr;
1912 } elsif( $func eq 'E' ){
1913 # E<x> - convert to character
1914 $$rstr =~ s/^(\w+)>//;
228a48a5 1915 $res .= $E2c{$1} || "";
2a28b791 1916 } elsif( $func eq 'X' ){
1917 # X<> - ignore
1918 $$rstr =~ s/^[^>]*>//;
1919 } elsif( $func eq 'Z' ){
1920 # Z<> - empty
1921 $$rstr =~ s/^>//;
1922 } else {
1923 # all others: either recurse into new function or
1924 # terminate at closing angle bracket
1925 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1926 $res .= $1;
1927 last if $2 eq '>';
1928 $res .= depod1( $rstr, substr($2,0,1) );
1929 }
1930 ## If we're here and $2 ne '>': undelimited interior sequence.
1931 ## Ignored, as this is called without proper indication of where we are.
1932 ## Rely on process_text to produce diagnostics.
1933 }
1934 return $res;
1935}
54310121 1936
2a28b791 1937#
1938# fragment_id - construct a fragment identifier from:
1939# a) =item text
1940# b) contents of C<...>
1941#
1942my @hc;
1943sub fragment_id {
1944 my $text = shift();
1945 $text =~ s/\s+\Z//s;
1946 if( $text ){
1947 # a method or function?
1948 return $1 if $text =~ /(\w+)\s*\(/;
1949 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1950
1951 # a variable name?
1952 return $1 if $text =~ /^([$@%*]\S+)/;
1953
1954 # some pattern matching operator?
1955 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1956
1957 # fancy stuff... like "do { }"
1958 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1959
1960 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1961 # and some funnies with ... Module ...
1962 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1963 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1964
1965 # text? normalize!
1966 $text =~ s/\s+/_/sg;
1967 $text =~ s{(\W)}{
1968 defined( $hc[ord($1)] ) ? $hc[ord($1)]
1969 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1970 $text = substr( $text, 0, 50 );
1971 } else {
1972 return undef();
1973 }
54310121 1974}
1975
2a28b791 1976#
1977# make_URL_href - generate HTML href from URL
1978# Special treatment for CGI queries.
1979#
1980sub make_URL_href($){
1981 my( $url ) = @_;
1982 if( $url !~
228a48a5 1983 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791 1984 $url = "<A HREF=\"$url\">$url</A>";
1985 }
1986 return $url;
54310121 1987}
1988
19891;