fix diagnostics to report "our" vs "my" correctly
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
2a28b791 8$VERSION = 1.03;
54310121 9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288 15use locale; # make \w work right in non-ASCII lands
16
54310121 17use strict;
18
7b8d334a 19use Config;
20
54310121 21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121 24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121 28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3 48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9 53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121 57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
34db337b 140=item css
141
142 --css=stylesheet
143
144Specify the URL of a cascading style sheet.
145
54310121 146=item verbose
147
148 --verbose
149
150Display progress messages.
151
34db337b 152=item quiet
153
154 --quiet
155
156Don't display I<mostly harmless> warning messages.
157
54310121 158=back
159
160=head1 EXAMPLE
161
162 pod2html("pod2html",
163 "--podpath=lib:ext:pod:vms",
164 "--podroot=/usr/src/perl",
165 "--htmlroot=/perl/nmanual",
166 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
167 "--recurse",
168 "--infile=foo.pod",
169 "--outfile=/perl/nmanual/foo.html");
170
34db337b 171=head1 ENVIRONMENT
172
173Uses $Config{pod2html} to setup default options.
174
54310121 175=head1 AUTHOR
176
177Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
178
54310121 179=head1 SEE ALSO
180
181L<perlpod>
182
183=head1 COPYRIGHT
184
185This program is distributed under the Artistic License.
186
187=cut
188
1f763251 189my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
190my $dircache = "pod2htmd$cache_ext";
191my $itemcache = "pod2htmi$cache_ext";
54310121 192
193my @begin_stack = (); # begin/end stack
194
29f227c9 195my @libpods = (); # files to search for links from C<> directives
196my $htmlroot = "/"; # http-server base directory from which all
54310121 197 # relative paths in $podpath stem.
5a039dd3 198my $htmldir = ""; # The directory to which the html pages
199 # will (eventually) be written.
54310121 200my $htmlfile = ""; # write to stdout by default
29f227c9 201my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 202 # refer to this file. This is only used
203 # to make relative urls that point to
204 # other files.
54310121 205my $podfile = ""; # read from stdin by default
206my @podpath = (); # list of directories containing library pods.
207my $podroot = "."; # filesystem base directory from which all
208 # relative paths in $podpath stem.
34db337b 209my $css = ''; # Cascading style sheet
54310121 210my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 211my $quiet = 0; # not quiet by default
54310121 212my $verbose = 0; # not verbose by default
213my $doindex = 1; # non-zero if we should generate an index
214my $listlevel = 0; # current list depth
2a28b791 215my @listend = (); # the text to use to end the list.
216my $after_lpar = 0; # set to true after a par in an =item
54310121 217my $ignore = 1; # whether or not to format text. we don't
218 # format text until we hit our first pod
219 # directive.
220
221my %items_named = (); # for the multiples of the same item in perlfunc
222my @items_seen = ();
223my $netscape = 0; # whether or not to use netscape directives.
224my $title; # title to give the pod(s)
34db337b 225my $header = 0; # produce block header/footer
54310121 226my $top = 1; # true if we are at the top of the doc. used
227 # to prevent the first <HR> directive.
228my $paragraph; # which paragraph we're processing (used
229 # for error messages)
2a28b791 230my $ptQuote = 0; # status of double-quote conversion
54310121 231my %pages = (); # associative array used to find the location
232 # of pages referenced by L<> links.
233my %sections = (); # sections within this page
234my %items = (); # associative array used to find the location
235 # of =item directives referenced by C<> links
2a28b791 236my %local_items = (); # local items - avoid destruction of %items
39e571d4 237my $Is83; # is dos with short filenames (8.3)
238
54310121 239sub init_globals {
04f720db 240$dircache = "pod2htmd$cache_ext";
241$itemcache = "pod2htmi$cache_ext";
54310121 242
243@begin_stack = (); # begin/end stack
244
245@libpods = (); # files to search for links from C<> directives
246$htmlroot = "/"; # http-server base directory from which all
247 # relative paths in $podpath stem.
248$htmlfile = ""; # write to stdout by default
249$podfile = ""; # read from stdin by default
250@podpath = (); # list of directories containing library pods.
251$podroot = "."; # filesystem base directory from which all
252 # relative paths in $podpath stem.
34db337b 253$css = ''; # Cascading style sheet
54310121 254$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 255$quiet = 0; # not quiet by default
54310121 256$verbose = 0; # not verbose by default
257$doindex = 1; # non-zero if we should generate an index
258$listlevel = 0; # current list depth
2a28b791 259@listend = (); # the text to use to end the list.
260$after_lpar = 0; # set to true after a par in an =item
54310121 261$ignore = 1; # whether or not to format text. we don't
262 # format text until we hit our first pod
263 # directive.
264
265@items_seen = ();
266%items_named = ();
267$netscape = 0; # whether or not to use netscape directives.
34db337b 268$header = 0; # produce block header/footer
54310121 269$title = ''; # title to give the pod(s)
270$top = 1; # true if we are at the top of the doc. used
271 # to prevent the first <HR> directive.
272$paragraph = ''; # which paragraph we're processing (used
273 # for error messages)
54310121 274%sections = (); # sections within this page
3e3baf6d 275
276# These are not reinitialised here but are kept as a cache.
277# See get_cache and related cache management code.
278#%pages = (); # associative array used to find the location
279 # of pages referenced by L<> links.
280#%items = (); # associative array used to find the location
54310121 281 # of =item directives referenced by C<> links
2a28b791 282%local_items = ();
39e571d4 283$Is83=$^O eq 'dos';
54310121 284}
285
2a28b791 286#
287# clean_data: global clean-up of pod data
288#
289sub clean_data($){
290 my( $dataref ) = @_;
291 my $i;
292 for( $i = 0; $i <= $#$dataref; $i++ ){
293 ${$dataref}[$i] =~ s/\s+\Z//;
294
295 # have a look for all-space lines
296 if( ${$dataref}[$i] =~ /^\s+$/m ){
297 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
298 splice( @$dataref, $i, 1, @chunks );
299 }
300 }
301}
302
303
54310121 304sub pod2html {
305 local(@ARGV) = @_;
306 local($/);
307 local $_;
308
309 init_globals();
310
39e571d4 311 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
312
54310121 313 # cache of %pages and %items from last time we ran pod2html
54310121 314
315 #undef $opt_help if defined $opt_help;
316
317 # parse the command-line parameters
318 parse_command_line();
319
320 # set some variables to their default values if necessary
321 local *POD;
322 unless (@ARGV && $ARGV[0]) {
323 $podfile = "-" unless $podfile; # stdin
324 open(POD, "<$podfile")
325 || die "$0: cannot open $podfile file for input: $!\n";
326 } else {
327 $podfile = $ARGV[0]; # XXX: might be more filenames
328 *POD = *ARGV;
329 }
330 $htmlfile = "-" unless $htmlfile; # stdout
331 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9 332 $htmldir =~ s#/$## ; # so we don't get a //
333 if ( $htmlroot eq ''
334 && defined( $htmldir )
335 && $htmldir ne ''
336 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
337 )
5a039dd3 338 {
29f227c9 339 # Set the 'base' url for this file, so that we can use it
340 # as the location from which to calculate relative links
341 # to other files. If this is '', then absolute links will
342 # be used throughout.
343 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 344 }
54310121 345
346 # read the pod a paragraph at a time
347 warn "Scanning for sections in input file(s)\n" if $verbose;
348 $/ = "";
349 my @poddata = <POD>;
350 close(POD);
2a28b791 351 clean_data( \@poddata );
54310121 352
353 # scan the pod for =head[1-6] directives and build an index
354 my $index = scan_headings(\%sections, @poddata);
355
3e3baf6d 356 unless($index) {
31e56455 357 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 358 }
359
54310121 360 # open the output file
361 open(HTML, ">$htmlfile")
362 || die "$0: cannot open $htmlfile file for output: $!\n";
363
d011ffae 364 # put a title in the HTML file if one wasn't specified
365 if ($title eq '') {
366 TITLE_SEARCH: {
367 for (my $i = 0; $i < @poddata; $i++) {
368 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
369 for my $para ( @poddata[$i, $i+1] ) {
370 last TITLE_SEARCH
371 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
372 }
373 }
54310121 374
d011ffae 375 }
376 }
377 }
3e3baf6d 378 if (!$title and $podfile =~ /\.pod$/) {
379 # probably a split pod so take first =head[12] as title
380 for (my $i = 0; $i < @poddata; $i++) {
381 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
382 }
383 warn "adopted '$title' as title for $podfile\n"
384 if $verbose and $title;
385 }
7b8d334a 386 if ($title) {
387 $title =~ s/\s*\(.*\)//;
388 } else {
34db337b 389 warn "$0: no title for $podfile" unless $quiet;
54310121 390 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
391 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 392 warn "using $title" if $verbose;
54310121 393 }
34db337b 394 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
395 $csslink =~ s,\\,/,g;
396 $csslink =~ s,(/.):,$1|,;
397
398 my $block = $header ? <<END_OF_BLOCK : '';
399<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
400<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
401<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
402</TD></TR>
403</TABLE>
404END_OF_BLOCK
405
54310121 406 print HTML <<END_OF_HEAD;
7b8d334a 407<HTML>
408<HEAD>
34db337b 409<TITLE>$title</TITLE>$csslink
7b8d334a 410<LINK REV="made" HREF="mailto:$Config{perladmin}">
411</HEAD>
54310121 412
7b8d334a 413<BODY>
34db337b 414$block
54310121 415END_OF_HEAD
416
3e3baf6d 417 # load/reload/validate/cache %pages and %items
418 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 419
420 # scan the pod for =item directives
2a28b791 421 scan_items( \%local_items, "", @poddata);
54310121 422
423 # put an index at the top of the file. note, if $doindex is 0 we
424 # still generate an index, but surround it with an html comment.
425 # that way some other program can extract it if desired.
426 $index =~ s/--+/-/g;
2a28b791 427 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 428 print HTML "<!-- INDEX BEGIN -->\n";
429 print HTML "<!--\n" unless $doindex;
430 print HTML $index;
431 print HTML "-->\n" unless $doindex;
432 print HTML "<!-- INDEX END -->\n\n";
31e56455 433 print HTML "<HR>\n" if $doindex and $index;
54310121 434
435 # now convert this file
2a28b791 436 my $after_item; # set to true after an =item
437 warn "Converting input file $podfile\n" if $verbose;
438 foreach my $i (0..$#poddata){
439 $ptQuote = 0; # status of quote conversion
440
54310121 441 $_ = $poddata[$i];
442 $paragraph = $i+1;
443 if (/^(=.*)/s) { # is it a pod directive?
444 $ignore = 0;
2a28b791 445 $after_item = 0;
54310121 446 $_ = $1;
447 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
448 process_begin($1, $2);
449 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
450 process_end($1, $2);
451 } elsif (/^=cut/) { # =cut
452 process_cut();
453 } elsif (/^=pod/) { # =pod
454 process_pod();
455 } else {
456 next if @begin_stack && $begin_stack[-1] ne 'html';
457
7b8d334a 458 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 459 process_head( $1, $2, $doindex && $index );
460 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
461 warn "$0: $podfile: =item without bullet, number or text"
462 . " in paragraph $paragraph.\n" if $1 eq '';
463 process_item( $1 );
464 $after_item = 1;
54310121 465 } elsif (/^=over\s*(.*)/) { # =over N
466 process_over();
467 } elsif (/^=back/) { # =back
468 process_back();
469 } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
470 process_for($1,$2);
471 } else {
472 /^=(\S*)\s*/;
473 warn "$0: $podfile: unknown pod directive '$1' in "
474 . "paragraph $paragraph. ignoring.\n";
475 }
476 }
477 $top = 0;
478 }
479 else {
480 next if $ignore;
481 next if @begin_stack && $begin_stack[-1] ne 'html';
482 my $text = $_;
2a28b791 483 if( $text =~ /\A\s+/ ){
484 process_pre( \$text );
485 print HTML "<PRE>\n$text</PRE>\n";
486
487 } else {
488 process_text( \$text );
489
490 # experimental: check for a paragraph where all lines
491 # have some ...\t...\t...\n pattern
492 if( $text =~ /\t/ ){
493 my @lines = split( "\n", $text );
494 if( @lines > 1 ){
495 my $all = 2;
496 foreach my $line ( @lines ){
497 if( $line =~ /\S/ && $line !~ /\t/ ){
498 $all--;
499 last if $all == 0;
500 }
501 }
502 if( $all > 0 ){
503 $text =~ s/\t+/<TD>/g;
504 $text =~ s/^/<TR><TD>/gm;
505 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
506 $text . '</TABLE>';
507 }
508 }
509 }
510 ## end of experimental
511
512 if( $after_item ){
513 print HTML "$text\n";
514 $after_lpar = 1;
515 } else {
516 print HTML "<P>$text</P>\n";
517 }
518 }
519 $after_item = 0;
54310121 520 }
521 }
522
523 # finish off any pending directives
524 finish_list();
2a28b791 525
526 # link to page index
527 print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
528 if $doindex and $index;
529
54310121 530 print HTML <<END_OF_TAIL;
34db337b 531$block
7b8d334a 532</BODY>
54310121 533
7b8d334a 534</HTML>
54310121 535END_OF_TAIL
536
537 # close the html file
538 close(HTML);
539
540 warn "Finished\n" if $verbose;
541}
542
543##############################################################################
544
545my $usage; # see below
546sub usage {
547 my $podfile = shift;
548 warn "$0: $podfile: @_\n" if @_;
549 die $usage;
550}
551
552$usage =<<END_OF_USAGE;
553Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
554 --podpath=<name>:...:<name> --podroot=<name>
555 --libpods=<name>:...:<name> --recurse --verbose --index
556 --netscape --norecurse --noindex
557
558 --flush - flushes the item and directory caches.
559 --help - prints this message.
560 --htmlroot - http-server base directory from which all relative paths
561 in podpath stem (default is /).
562 --index - generate an index at the top of the resulting html
563 (default).
564 --infile - filename for the pod to convert (input taken from stdin
565 by default).
566 --libpods - colon-separated list of pages to search for =item pod
567 directives in as targets of C<> and implicit links (empty
568 by default). note, these are not filenames, but rather
569 page names like those that appear in L<> links.
570 --netscape - will use netscape html directives when applicable.
571 --nonetscape - will not use netscape directives (default).
572 --outfile - filename for the resulting html file (output sent to
573 stdout by default).
574 --podpath - colon-separated list of directories containing library
575 pods. empty by default.
576 --podroot - filesystem base directory from which all relative paths
577 in podpath stem (default is .).
578 --noindex - don't generate an index at the top of the resulting html.
579 --norecurse - don't recurse on those subdirectories listed in podpath.
580 --recurse - recurse on those subdirectories listed in podpath
581 (default behavior).
582 --title - title that will appear in resulting html file.
34db337b 583 --header - produce block header/footer
584 --css - stylesheet URL
54310121 585 --verbose - self-explanatory
34db337b 586 --quiet - supress some benign warning messages
54310121 587
588END_OF_USAGE
589
590sub parse_command_line {
2a28b791 591 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
34db337b 592 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 593 my $result = GetOptions(
29f227c9 594 'flush' => \$opt_flush,
595 'help' => \$opt_help,
596 'htmldir=s' => \$opt_htmldir,
54310121 597 'htmlroot=s' => \$opt_htmlroot,
29f227c9 598 'index!' => \$opt_index,
54310121 599 'infile=s' => \$opt_infile,
600 'libpods=s' => \$opt_libpods,
601 'netscape!' => \$opt_netscape,
602 'outfile=s' => \$opt_outfile,
603 'podpath=s' => \$opt_podpath,
604 'podroot=s' => \$opt_podroot,
54310121 605 'recurse!' => \$opt_recurse,
606 'title=s' => \$opt_title,
34db337b 607 'header' => \$opt_header,
608 'css=s' => \$opt_css,
54310121 609 'verbose' => \$opt_verbose,
34db337b 610 'quiet' => \$opt_quiet,
54310121 611 );
612 usage("-", "invalid parameters") if not $result;
613
614 usage("-") if defined $opt_help; # see if the user asked for help
615 $opt_help = ""; # just to make -w shut-up.
616
617 $podfile = $opt_infile if defined $opt_infile;
618 $htmlfile = $opt_outfile if defined $opt_outfile;
5a039dd3 619 $htmldir = $opt_htmldir if defined $opt_outfile;
54310121 620
621 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
622 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
623
624 warn "Flushing item and directory caches\n"
625 if $opt_verbose && defined $opt_flush;
626 unlink($dircache, $itemcache) if defined $opt_flush;
627
628 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
629 $podroot = $opt_podroot if defined $opt_podroot;
630
631 $doindex = $opt_index if defined $opt_index;
632 $recurse = $opt_recurse if defined $opt_recurse;
633 $title = $opt_title if defined $opt_title;
34db337b 634 $header = defined $opt_header ? 1 : 0;
635 $css = $opt_css if defined $opt_css;
54310121 636 $verbose = defined $opt_verbose ? 1 : 0;
34db337b 637 $quiet = defined $opt_quiet ? 1 : 0;
54310121 638 $netscape = $opt_netscape if defined $opt_netscape;
639}
640
3e3baf6d 641
642my $saved_cache_key;
643
644sub get_cache {
645 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
646 my @cache_key_args = @_;
647
648 # A first-level cache:
649 # Don't bother reading the cache files if they still apply
650 # and haven't changed since we last read them.
651
652 my $this_cache_key = cache_key(@cache_key_args);
653
654 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
655
656 # load the cache of %pages and %items if possible. $tests will be
657 # non-zero if successful.
658 my $tests = 0;
659 if (-f $dircache && -f $itemcache) {
660 warn "scanning for item cache\n" if $verbose;
661 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
662 }
663
664 # if we didn't succeed in loading the cache then we must (re)build
665 # %pages and %items.
666 if (!$tests) {
667 warn "scanning directories in pod-path\n" if $verbose;
668 scan_podpath($podroot, $recurse, 0);
669 }
670 $saved_cache_key = cache_key(@cache_key_args);
671}
672
673sub cache_key {
674 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
675 return join('!', $dircache, $itemcache, $recurse,
29f227c9 676 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 677}
678
54310121 679#
3e3baf6d 680# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 681# are valid caches of %pages and %items. if they are valid then it loads
682# them and returns a non-zero value.
683#
3e3baf6d 684sub load_cache {
54310121 685 my($dircache, $itemcache, $podpath, $podroot) = @_;
686 my($tests);
687 local $_;
688
689 $tests = 0;
690
691 open(CACHE, "<$itemcache") ||
692 die "$0: error opening $itemcache for reading: $!\n";
693 $/ = "\n";
694
695 # is it the same podpath?
696 $_ = <CACHE>;
697 chomp($_);
3e3baf6d 698 $tests++ if (join(":", @$podpath) eq $_);
54310121 699
700 # is it the same podroot?
701 $_ = <CACHE>;
702 chomp($_);
703 $tests++ if ($podroot eq $_);
704
705 # load the cache if its good
706 if ($tests != 2) {
707 close(CACHE);
54310121 708 return 0;
709 }
710
711 warn "loading item cache\n" if $verbose;
712 while (<CACHE>) {
713 /(.*?) (.*)$/;
714 $items{$1} = $2;
715 }
716 close(CACHE);
717
718 warn "scanning for directory cache\n" if $verbose;
719 open(CACHE, "<$dircache") ||
720 die "$0: error opening $dircache for reading: $!\n";
721 $/ = "\n";
722 $tests = 0;
723
724 # is it the same podpath?
725 $_ = <CACHE>;
726 chomp($_);
3e3baf6d 727 $tests++ if (join(":", @$podpath) eq $_);
54310121 728
729 # is it the same podroot?
730 $_ = <CACHE>;
731 chomp($_);
732 $tests++ if ($podroot eq $_);
733
734 # load the cache if its good
735 if ($tests != 2) {
736 close(CACHE);
54310121 737 return 0;
738 }
739
740 warn "loading directory cache\n" if $verbose;
741 while (<CACHE>) {
742 /(.*?) (.*)$/;
743 $pages{$1} = $2;
744 }
745
746 close(CACHE);
747
748 return 1;
749}
750
751#
752# scan_podpath - scans the directories specified in @podpath for directories,
753# .pod files, and .pm files. it also scans the pod files specified in
754# @libpods for =item directives.
755#
756sub scan_podpath {
3e3baf6d 757 my($podroot, $recurse, $append) = @_;
54310121 758 my($pwd, $dir);
759 my($libpod, $dirname, $pod, @files, @poddata);
760
3e3baf6d 761 unless($append) {
762 %items = ();
763 %pages = ();
764 }
765
54310121 766 # scan each directory listed in @podpath
767 $pwd = getcwd();
768 chdir($podroot)
769 || die "$0: error changing to directory $podroot: $!\n";
770 foreach $dir (@podpath) {
771 scan_dir($dir, $recurse);
772 }
773
774 # scan the pods listed in @libpods for =item directives
775 foreach $libpod (@libpods) {
776 # if the page isn't defined then we won't know where to find it
777 # on the system.
778 next unless defined $pages{$libpod} && $pages{$libpod};
779
780 # if there is a directory then use the .pod and .pm files within it.
29f227c9 781 # NOTE: Only finds the first so-named directory in the tree.
782# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
783 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 784 # find all the .pod and .pm files within the directory
785 $dirname = $1;
786 opendir(DIR, $dirname) ||
787 die "$0: error opening directory $dirname: $!\n";
788 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
789 closedir(DIR);
790
791 # scan each .pod and .pm file for =item directives
792 foreach $pod (@files) {
793 open(POD, "<$dirname/$pod") ||
794 die "$0: error opening $dirname/$pod for input: $!\n";
795 @poddata = <POD>;
796 close(POD);
2a28b791 797 clean_data( \@poddata );
54310121 798
2a28b791 799 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 800 }
801
802 # use the names of files as =item directives too.
2a28b791 803### Don't think this should be done this way - confuses issues.(WL)
804### foreach $pod (@files) {
805### $pod =~ /^(.*)(\.pod|\.pm)$/;
806### $items{$1} = "$dirname/$1.html" if $1;
807### }
54310121 808 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
809 $pages{$libpod} =~ /([^:]*\.pm):/) {
810 # scan the .pod or .pm file for =item directives
811 $pod = $1;
812 open(POD, "<$pod") ||
813 die "$0: error opening $pod for input: $!\n";
814 @poddata = <POD>;
815 close(POD);
2a28b791 816 clean_data( \@poddata );
54310121 817
2a28b791 818 scan_items( \%items, "$pod", @poddata);
54310121 819 } else {
820 warn "$0: shouldn't be here (line ".__LINE__."\n";
821 }
822 }
823 @poddata = (); # clean-up a bit
824
825 chdir($pwd)
826 || die "$0: error changing to directory $pwd: $!\n";
827
828 # cache the item list for later use
829 warn "caching items for later use\n" if $verbose;
830 open(CACHE, ">$itemcache") ||
831 die "$0: error open $itemcache for writing: $!\n";
832
833 print CACHE join(":", @podpath) . "\n$podroot\n";
834 foreach my $key (keys %items) {
835 print CACHE "$key $items{$key}\n";
836 }
837
838 close(CACHE);
839
840 # cache the directory list for later use
841 warn "caching directories for later use\n" if $verbose;
842 open(CACHE, ">$dircache") ||
843 die "$0: error open $dircache for writing: $!\n";
844
845 print CACHE join(":", @podpath) . "\n$podroot\n";
846 foreach my $key (keys %pages) {
847 print CACHE "$key $pages{$key}\n";
848 }
849
850 close(CACHE);
851}
852
853#
854# scan_dir - scans the directory specified in $dir for subdirectories, .pod
855# files, and .pm files. notes those that it finds. this information will
856# be used later in order to figure out where the pages specified in L<>
857# links are on the filesystem.
858#
859sub scan_dir {
860 my($dir, $recurse) = @_;
861 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
862 local $_;
863
864 @subdirs = ();
865 @pods = ();
866
867 opendir(DIR, $dir) ||
868 die "$0: error opening directory $dir: $!\n";
869 while (defined($_ = readdir(DIR))) {
870 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
871 $pages{$_} = "" unless defined $pages{$_};
872 $pages{$_} .= "$dir/$_:";
873 push(@subdirs, $_);
874 } elsif (/\.pod$/) { # .pod
875 s/\.pod$//;
876 $pages{$_} = "" unless defined $pages{$_};
877 $pages{$_} .= "$dir/$_.pod:";
878 push(@pods, "$dir/$_.pod");
879 } elsif (/\.pm$/) { # .pm
880 s/\.pm$//;
881 $pages{$_} = "" unless defined $pages{$_};
882 $pages{$_} .= "$dir/$_.pm:";
883 push(@pods, "$dir/$_.pm");
884 }
885 }
886 closedir(DIR);
887
888 # recurse on the subdirectories if necessary
889 if ($recurse) {
890 foreach my $subdir (@subdirs) {
891 scan_dir("$dir/$subdir", $recurse);
892 }
893 }
894}
895
896#
897# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
898# build an index.
899#
900sub scan_headings {
901 my($sections, @data) = @_;
2a28b791 902 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 903
be173d55 904 # here we need local $ignore = 0;
905 # unfortunately, we can't have it, because $ignore is lexical
906 $ignore = 0;
907
54310121 908 $listdepth = 0;
909 $index = "";
910
911 # scan for =head directives, note their name, and build an index
912 # pointing to each of them.
913 foreach my $line (@data) {
bb9460ed 914 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 915 ($tag, $which_head, $otitle) = ($1,$2,$3);
916
917 my $title = depod( $otitle );
918 my $name = htmlify( $title );
919 $$sections{$name} = 1;
920 $title = process_text( \$otitle );
54310121 921
102c538a 922 while ($which_head != $listdepth) {
923 if ($which_head > $listdepth) {
924 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
925 $listdepth++;
926 } elsif ($which_head < $listdepth) {
927 $listdepth--;
928 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
929 }
54310121 930 }
54310121 931
932 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 933 "<A HREF=\"#" . $name . "\">" .
934 $title . "</A></LI>";
54310121 935 }
936 }
937
938 # finish off the lists
939 while ($listdepth--) {
940 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
941 }
942
943 # get rid of bogus lists
944 $index =~ s,\t*<UL>\s*</UL>\n,,g;
945
bb9460ed 946 $ignore = 1; # restore old value;
be173d55 947
54310121 948 return $index;
949}
950
951#
952# scan_items - scans the pod specified by $pod for =item directives. we
953# will use this information later on in resolving C<> links.
954#
955sub scan_items {
2a28b791 956 my( $itemref, $pod, @poddata ) = @_;
54310121 957 my($i, $item);
958 local $_;
959
960 $pod =~ s/\.pod$//;
961 $pod .= ".html" if $pod;
962
963 foreach $i (0..$#poddata) {
2a28b791 964 my $txt = depod( $poddata[$i] );
965
966 # figure out what kind of item it is.
967 # Build string for referencing this item.
968 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
969 next unless $1;
970 $item = $1;
971 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
972 $item = $1;
973 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
974 $item = $1;
975 } else {
976 next;
54310121 977 }
2a28b791 978 my $fid = fragment_id( $item );
979 $$itemref{$fid} = "$pod" if $fid;
54310121 980 }
981}
982
983#
984# process_head - convert a pod head[1-6] tag and convert it to HTML format.
985#
986sub process_head {
2a28b791 987 my($tag, $heading, $hasindex) = @_;
54310121 988
989 # figure out the level of the =head
990 $tag =~ /head([1-6])/;
991 my $level = $1;
992
2a28b791 993 if( $listlevel ){
994 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
995 while( $listlevel ){
996 process_back();
997 }
998 }
999
1000 print HTML "<P>\n";
1001 if( $level == 1 && ! $top ){
1002 print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1003 if $hasindex;
1004 print HTML "<HR>\n"
1005 }
1006
1007 my $name = htmlify( depod( $heading ) );
1008 my $convert = process_text( \$heading );
1009 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1010}
1011
2a28b791 1012
54310121 1013#
2a28b791 1014# emit_item_tag - print an =item's text
1015# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1016#
2a28b791 1017my $EmittedItem;
1018
1019sub emit_item_tag($$$){
1020 my( $otext, $text, $compact ) = @_;
1021 my $item = fragment_id( $text );
54310121 1022
2a28b791 1023 $EmittedItem = $item;
1024 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1025
2a28b791 1026 print HTML '<STRONG>';
1027 if ($items_named{$item}++) {
1028 print HTML process_text( \$otext );
1029 } else {
1030 my $name = 'item_' . $item;
1031 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1032 }
1033 print HTML "</STRONG><BR>\n";
1034 undef( $EmittedItem );
1035}
1036
1037sub emit_li {
1038 my( $tag ) = @_;
1039 if( $items_seen[$listlevel]++ == 0 ){
1040 push( @listend, "</$tag>" );
1041 print HTML "<$tag>\n";
1042 }
1043 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1044}
1045
1046#
1047# process_item - convert a pod item tag and convert it to HTML format.
1048#
1049sub process_item {
1050 my( $otext ) = @_;
54310121 1051
1052 # lots of documents start a list without doing an =over. this is
1053 # bad! but, the proper thing to do seems to be to just assume
1054 # they did do an =over. so warn them once and then continue.
2a28b791 1055 if( $listlevel == 0 ){
1056 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1057 process_over();
1058 }
54310121 1059
2a28b791 1060 # formatting: insert a paragraph if preceding item has >1 paragraph
1061 if( $after_lpar ){
1062 print HTML "<P></P>\n";
1063 $after_lpar = 0;
1064 }
54310121 1065
1066 # remove formatting instructions from the text
2a28b791 1067 my $text = depod( $otext );
1068
1069 # all the list variants:
1070 if( $text =~ /\A\*/ ){ # bullet
1071 emit_li( 'UL' );
1072 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1073 my $tag = $1;
1074 $otext =~ s/\A\*\s+//;
1075 emit_item_tag( $otext, $tag, 1 );
54310121 1076 }
1077
2a28b791 1078 } elsif( $text =~ /\A\d+/ ){ # numbered list
1079 emit_li( 'OL' );
1080 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1081 my $tag = $1;
1082 $otext =~ s/\A\d+\.?\s*//;
1083 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1084 }
54310121 1085
2a28b791 1086 } else { # definition list
1087 emit_li( 'DL' );
1088 if ($text =~ /\A(.+)\Z/s ){ # should have text
1089 emit_item_tag( $otext, $text, 1 );
7b8d334a 1090 }
54310121 1091 print HTML '<DD>';
1092 }
54310121 1093 print HTML "\n";
1094}
1095
1096#
2a28b791 1097# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1098#
1099sub process_over {
1100 # start a new list
1101 $listlevel++;
2a28b791 1102 push( @items_seen, 0 );
1103 $after_lpar = 0;
54310121 1104}
1105
1106#
1107# process_back - process a pod back tag and convert it to HTML format.
1108#
1109sub process_back {
2a28b791 1110 if( $listlevel == 0 ){
1111 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1112 return;
1113 }
54310121 1114
1115 # close off the list. note, I check to see if $listend[$listlevel] is
1116 # defined because an =item directive may have never appeared and thus
1117 # $listend[$listlevel] may have never been initialized.
1118 $listlevel--;
2a28b791 1119 if( defined $listend[$listlevel] ){
1120 print HTML '<P></P>' if $after_lpar;
1121 print HTML $listend[$listlevel];
1122 print HTML "\n";
1123 pop( @listend );
1124 }
1125 $after_lpar = 0;
54310121 1126
2a28b791 1127 # clean up item count
1128 pop( @items_seen );
54310121 1129}
1130
1131#
2a28b791 1132# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1133#
1134sub process_cut {
1135 $ignore = 1;
1136}
1137
1138#
2a28b791 1139# process_pod - process a pod pod tag, thus stop ignoring pod directives
1140# until we see a corresponding cut.
54310121 1141#
1142sub process_pod {
1143 # no need to set $ignore to 0 cause the main loop did it
1144}
1145
1146#
2a28b791 1147# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1148# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1149#
1150sub process_for {
1151 my($whom, $text) = @_;
1152 if ( $whom =~ /^(pod2)?html$/i) {
1153 print HTML $text;
c4d9b39d 1154 } elsif ($whom =~ /^illustration$/i) {
1155 1 while chomp $text;
1156 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1157 $text .= $ext, last if -r "$text$ext";
1158 }
1159 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1160 }
54310121 1161}
1162
1163#
1164# process_begin - process a =begin pod tag. this pushes
1165# whom we're beginning on the begin stack. if there's a
1166# begin stack, we only print if it us.
1167#
1168sub process_begin {
1169 my($whom, $text) = @_;
1170 $whom = lc($whom);
1171 push (@begin_stack, $whom);
1172 if ( $whom =~ /^(pod2)?html$/) {
1173 print HTML $text if $text;
1174 }
1175}
1176
1177#
1178# process_end - process a =end pod tag. pop the
1179# begin stack. die if we're mismatched.
1180#
1181sub process_end {
1182 my($whom, $text) = @_;
1183 $whom = lc($whom);
1184 if ($begin_stack[-1] ne $whom ) {
1185 die "Unmatched begin/end at chunk $paragraph\n"
1186 }
2a28b791 1187 pop( @begin_stack );
54310121 1188}
1189
1190#
2a28b791 1191# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1192#
2a28b791 1193sub process_pre {
1194 my( $text ) = @_;
1195 my( $rest );
54310121 1196 return if $ignore;
1197
54310121 1198 $rest = $$text;
1199
2a28b791 1200 # insert spaces in place of tabs
1201 $rest =~ s#.*#
be173d55 1202 my $line = $&;
1203 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1204 $line;
1205 #eg;
54310121 1206
2a28b791 1207 # convert some special chars to HTML escapes
1208 $rest =~ s/&/&amp;/g;
1209 $rest =~ s/</&lt;/g;
1210 $rest =~ s/>/&gt;/g;
1211 $rest =~ s/"/&quot;/g;
1212
1213 # try and create links for all occurrences of perl.* within
1214 # the preformatted text.
1215 $rest =~ s{
1216 (\s*)(perl\w+)
1217 }{
1218 if ( defined $pages{$2} ){ # is a link
1219 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1220 } elsif (defined $pages{dosify($2)}) { # is a link
1221 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1222 } else {
1223 "$1$2";
1224 }
1225 }xeg;
1226 $rest =~ s{
1227 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1228 }{
1229 my $url ;
1230 if ( $htmlfileurl ne '' ){
1231 # Here, we take advantage of the knowledge
1232 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1233 # Since $htmlroot eq '', we need to prepend $htmldir
1234 # on the fron of the link to get the absolute path
1235 # of the link's target. We check for a leading '/'
1236 # to avoid corrupting links that are #, file:, etc.
1237 my $old_url = $3 ;
1238 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1239 $url = relativize_url( "$old_url.html", $htmlfileurl );
1240 } else {
1241 $url = "$3.html" ;
1242 }
1243 "$1$url" ;
1244 }xeg;
1245
1246 # Look for embedded URLs and make them into links. We don't
1247 # relativize them since they are best left as the author intended.
1248
1249 my $urls = '(' . join ('|', qw{
54310121 1250 http
1251 telnet
1252 mailto
1253 news
1254 gopher
1255 file
1256 wais
1257 ftp
1258 } )
1259 . ')';
1260
2a28b791 1261 my $ltrs = '\w';
1262 my $gunk = '/#~:.?+=&%@!\-';
1263 my $punc = '.:?\-';
1264 my $any = "${ltrs}${gunk}${punc}";
54310121 1265
2a28b791 1266 $rest =~ s{
54310121 1267 \b # start at word boundary
1268 ( # begin $1 {
29f227c9 1269 $urls : # need resource and a colon
1270 (?!:) # Ignore File::, among others.
54310121 1271 [$any] +? # followed by on or more
1272 # of any valid character, but
1273 # be conservative and take only
1274 # what you need to....
1275 ) # end $1 }
1276 (?= # look-ahead non-consumptive assertion
1277 [$punc]* # either 0 or more puntuation
1278 [^$any] # followed by a non-url char
1279 | # or else
1280 $ # then end of the string
1281 )
1282 }{<A HREF="$1">$1</A>}igox;
1283
2a28b791 1284 # text should be as it is (verbatim)
1285 $$text = $rest;
1286}
54310121 1287
54310121 1288
2a28b791 1289#
1290# pure text processing
1291#
1292# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1293# we don't want this to happen within IS
1294#
1295sub pure_text($){
1296 my $text = shift();
1297 process_puretext( $text, \$ptQuote, 1 );
54310121 1298}
1299
2a28b791 1300sub inIS_text($){
1301 my $text = shift();
1302 process_puretext( $text, \$ptQuote, 0 );
1303}
54310121 1304
1305#
1306# process_puretext - process pure text (without pod-escapes) converting
1307# double-quotes and handling implicit C<> links.
1308#
1309sub process_puretext {
2a28b791 1310 my($text, $quote, $notinIS) = @_;
54310121 1311
2a28b791 1312 ## Guessing at func() or [$@%&]*var references in plain text is destined
1313 ## to produce some strange looking ref's. uncomment to disable:
1314 ## $notinIS = 0;
1315
1316 my(@words, $lead, $trail);
54310121 1317
2a28b791 1318 # convert double-quotes to single-quotes
1319 if( $$quote && $text =~ s/"/''/s ){
1320 $$quote = 0;
1321 }
1322 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1323 $$quote = 1 if $text =~ s/"/``/s;
54310121 1324
1325 # keep track of leading and trailing white-space
2a28b791 1326 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1327 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1328
2a28b791 1329 # split at space/non-space boundaries
1330 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1331
1332 # process each word individually
1333 foreach my $word (@words) {
2a28b791 1334 # skip space runs
1335 next if $word =~ /^\s*$/;
54310121 1336 # see if we can infer a link
2a28b791 1337 if( $notinIS && $word =~ s/^(\w+)\((.*)\)\W*$/$1/ ) {
54310121 1338 # has parenthesis so should have been a C<> ref
2a28b791 1339 ## try for a pagename (perlXXX(1))?
1340 if( $2 =~ /^\d+$/ ){
1341 my $url = page_sect( $word, '' );
1342 if( defined $url ){
1343 $word = "<A HREF=\"$url\">the $word manpage</A>";
1344 next;
1345 }
1346 }
1347 $word = emit_C( $word );
1348
1349#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1350## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1351## # perl variables, should be a C<> ref
1352## $word = emit_C( $word );
1353
54310121 1354 } elsif ($word =~ m,^\w+://\w,) {
1355 # looks like a URL
5a039dd3 1356 # Don't relativize it: leave it as the author intended
54310121 1357 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1358 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1359 # looks like an e-mail address
7b8d334a 1360 my ($w1, $w2, $w3) = ("", $word, "");
1361 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1362 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1363 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1364 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1365 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1366 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1367 } else {
7b8d334a 1368 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1369 }
1370 }
1371
2a28b791 1372 # put everything back together
1373 return $lead . join( '', @words ) . $trail;
1374}
1375
54310121 1376
2a28b791 1377#
1378# process_text - handles plaintext that appears in the input pod file.
1379# there may be pod commands embedded within the text so those must be
1380# converted to html commands.
1381#
7ba65c74 1382
1383sub process_text1($$;$);
1384
2a28b791 1385sub process_text {
1386 return if $ignore;
1387 my( $tref ) = @_;
1388 my $res = process_text1( 0, $tref );
1389 $$tref = $res;
1390}
1391
1392sub process_text1($$;$){
1393 my( $lev, $rstr, $func ) = @_;
1394 $lev++ unless defined $func;
1395 my $res = '';
1396
1397 if( $func eq 'B' ){
1398 # B<text> - boldface
1399 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1400
1401 } elsif( $func eq 'C' ){
1402 # C<code> - can be a ref or <CODE></CODE>
1403 # need to extract text
1404 my $par = go_ahead( $rstr, 'C' );
1405
1406 ## clean-up of the link target
1407 my $text = depod( $par );
1408
1409 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1410 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1411
1412 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1413
1414 } elsif( $func eq 'E' ){
1415 # E<x> - convert to character
1416 $$rstr =~ s/^(\w+)>//;
1417 $res = "&$1;";
1418
1419 } elsif( $func eq 'F' ){
1420 # F<filename> - italizice
1421 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1422
1423 } elsif( $func eq 'I' ){
1424 # I<text> - italizice
1425 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1426
1427 } elsif( $func eq 'L' ){
1428 # L<link> - link
1429 ## L<text|cross-ref> => produce text, use cross-ref for linking
1430 ## L<cross-ref> => make text from cross-ref
1431 ## need to extract text
1432 my $par = go_ahead( $rstr, 'L' );
1433
1434 # some L<>'s that shouldn't be:
1435 # a) full-blown URL's are emitted as-is
1436 if( $par =~ m{^\w+://}s ){
1437 return make_URL_href( $par );
1438 }
1439 # b) C<...> is stripped and treated as C<>
1440 if( $par =~ /^C<(.*)>$/ ){
1441 my $text = depod( $1 );
1442 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1443 }
1444
1445 # analyze the contents
1446 $par =~ s/\n/ /g; # undo word-wrapped tags
1447 my $opar = $par;
1448 my $linktext;
1449 if( $par =~ s{^([^|]+)\|}{} ){
1450 $linktext = $1;
1451 }
1452
1453 # make sure sections start with a /
1454 $par =~ s{^"}{/"};
1455
1456 my( $page, $section, $ident );
1457
1458 # check for link patterns
1459 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1460 # we've got a name/ident (no quotes)
1461 ( $page, $ident ) = ( $1, $2 );
1462 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1463
1464 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1465 # even though this should be a "section", we go for ident first
1466 ( $page, $ident ) = ( $1, $2 );
1467 ### print STDERR "--> L<$par> to page $page, section $section\n";
1468
1469 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1470 ( $page, $section ) = ( '', $par );
1471 ### print STDERR "--> L<$par> to void page, section $section\n";
1472
1473 } else {
1474 ( $page, $section ) = ( $par, '' );
1475 ### print STDERR "--> L<$par> to page $par, void section\n";
1476 }
1477
1478 # now, either $section or $ident is defined. the convoluted logic
1479 # below tries to resolve L<> according to what the user specified.
1480 # failing this, we try to find the next best thing...
1481 my( $url, $ltext, $fid );
1482
1483 RESOLVE: {
1484 if( defined $ident ){
1485 ## try to resolve $ident as an item
1486 ( $url, $fid ) = coderef( $page, $ident );
1487 if( $url ){
1488 if( ! defined( $linktext ) ){
1489 $linktext = $ident;
1490 $linktext .= " in " if $ident && $page;
1491 $linktext .= "the $page manpage" if $page;
1492 }
1493 ### print STDERR "got coderef url=$url\n";
1494 last RESOLVE;
1495 }
1496 ## no luck: go for a section (auto-quoting!)
1497 $section = $ident;
1498 }
1499 ## now go for a section
1500 my $htmlsection = htmlify( $section );
1501 $url = page_sect( $page, $htmlsection );
1502 if( $url ){
1503 if( ! defined( $linktext ) ){
1504 $linktext = $section;
1505 $linktext .= " in " if $section && $page;
1506 $linktext .= "the $page manpage" if $page;
1507 }
1508 ### print STDERR "got page/section url=$url\n";
1509 last RESOLVE;
1510 }
1511 ## no luck: go for an ident
1512 if( $section ){
1513 $ident = $section;
1514 } else {
1515 $ident = $page;
1516 $page = undef();
1517 }
1518 ( $url, $fid ) = coderef( $page, $ident );
1519 if( $url ){
1520 if( ! defined( $linktext ) ){
1521 $linktext = $ident;
1522 $linktext .= " in " if $ident && $page;
1523 $linktext .= "the $page manpage" if $page;
1524 }
1525 ### print STDERR "got section=>coderef url=$url\n";
1526 last RESOLVE;
1527 }
1528
1529 # warning; show some text.
1530 $linktext = $opar unless defined $linktext;
1531 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1532 }
1533
1534 # now we have an URL or just plain code
1535 $$rstr = $linktext . '>' . $$rstr;
1536 if( defined( $url ) ){
1537 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1538 } else {
1539 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1540 }
1541
1542 } elsif( $func eq 'S' ){
1543 # S<text> - non-breaking spaces
1544 $res = process_text1( $lev, $rstr );
1545 $res =~ s/ /&nbsp;/g;
1546
1547 } elsif( $func eq 'X' ){
1548 # X<> - ignore
1549 $$rstr =~ s/^[^>]*>//;
1550
1551 } elsif( $func eq 'Z' ){
1552 # Z<> - empty
1553 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1554 unless $$rstr =~ s/^>//;
1555
1556 } else {
1557 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1558 # all others: either recurse into new function or
1559 # terminate at closing angle bracket
1560 my $pt = $1;
1561 $pt .= '>' if $2 eq '>' && $lev == 1;
1562 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1563 return $res if $2 eq '>' && $lev > 1;
1564 if( $2 ne '>' ){
1565 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1566 }
1567
1568 }
1569 if( $lev == 1 ){
1570 $res .= pure_text( $$rstr );
54310121 1571 } else {
2a28b791 1572 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1573 }
1574 }
2a28b791 1575 return $res;
1576}
54310121 1577
2a28b791 1578#
1579# go_ahead: extract text of an IS (can be nested)
1580#
1581sub go_ahead($$){
1582 my( $rstr, $func ) = @_;
1583 my $res = '';
1584 my $level = 1;
1585 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1586 $res .= $1;
1587 if( $2 eq '>' ){
1588 return $res if --$level == 0;
1589 } else {
1590 ++$level;
1591 }
1592 $res .= $2;
1593 }
1594 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1595 return $res;
54310121 1596}
1597
1598#
2a28b791 1599# emit_C - output result of C<text>
1600# $text is the depod-ed text
54310121 1601#
2a28b791 1602sub emit_C($;$){
1603 my( $text, $nocode ) = @_;
1604 my $res;
1605 my( $url, $fid ) = coderef( undef(), $text );
1606
1607 # need HTML-safe text
1608 my $linktext = html_escape( $text );
1609
1610 if( defined( $url ) &&
1611 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1612 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1613 } elsif( 0 && $nocode ){
1614 $res = $linktext;
1615 } else {
1616 $res = "<CODE>$linktext</CODE>";
1617 }
1618 return $res;
54310121 1619}
1620
1621#
2a28b791 1622# html_escape: make text safe for HTML
1623#
1624sub html_escape {
1625 my $rest = $_[0];
1626 $rest =~ s/&/&amp;/g;
1627 $rest =~ s/</&lt;/g;
1628 $rest =~ s/>/&gt;/g;
1629 $rest =~ s/"/&quot;/g;
1630 return $rest;
1631}
1632
1633
1634#
39e571d4 1635# dosify - convert filenames to 8.3
1636#
1637sub dosify {
1638 my($str) = @_;
fe4c6be1 1639 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1640 if ($Is83) {
1641 $str = lc $str;
1642 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1643 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1644 }
1645 return $str;
1646}
1647
1648#
2a28b791 1649# page_sect - make an URL from the text of a L<>
54310121 1650#
2a28b791 1651sub page_sect($$) {
1652 my( $page, $section ) = @_;
1653 my( $linktext, $page83, $link); # work strings
1654
1655 # check if we know that this is a section in this page
1656 if (!defined $pages{$page} && defined $sections{$page}) {
1657 $section = $page;
1658 $page = "";
1659 ### print STDERR "reset page='', section=$section\n";
54310121 1660 }
1661
39e571d4 1662 $page83=dosify($page);
1663 $page=$page83 if (defined $pages{$page83});
54310121 1664 if ($page eq "") {
2a28b791 1665 $link = "#" . htmlify( $section );
350ccacd 1666 } elsif ( $page =~ /::/ ) {
350ccacd 1667 $page =~ s,::,/,g;
29f227c9 1668 # Search page cache for an entry keyed under the html page name,
1669 # then look to see what directory that page might be in. NOTE:
1670 # this will only find one page. A better solution might be to produce
1671 # an intermediate page that is an index to all such pages.
1672 my $page_name = $page ;
1673 $page_name =~ s,^.*/,, ;
1674 if ( defined( $pages{ $page_name } ) &&
1675 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1676 ) {
1677 $page = $1 ;
1678 }
1679 else {
1680 # NOTE: This branch assumes that all A::B pages are located in
1681 # $htmlroot/A/B.html . This is often incorrect, since they are
1682 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1683 # analyze the contents of %pages and figure out where any
1684 # cousins of A::B are, then assume that. So, if A::B isn't found,
1685 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1686 # lib/A/B.pm. This is also limited, but it's an improvement.
1687 # Maybe a hints file so that the links point to the correct places
2a28b791 1688 # nonetheless?
1689
29f227c9 1690 }
350ccacd 1691 $link = "$htmlroot/$page.html";
2a28b791 1692 $link .= "#" . htmlify( $section ) if ($section);
54310121 1693 } elsif (!defined $pages{$page}) {
54310121 1694 $link = "";
54310121 1695 } else {
2a28b791 1696 $section = htmlify( $section ) if $section ne "";
1697 ### print STDERR "...section=$section\n";
54310121 1698
1699 # if there is a directory by the name of the page, then assume that an
1700 # appropriate section will exist in the subdirectory
29f227c9 1701# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1702 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1703 $link = "$htmlroot/$1/$section.html";
2a28b791 1704 ### print STDERR "...link=$link\n";
54310121 1705
1706 # since there is no directory by the name of the page, the section will
1707 # have to exist within a .html of the same name. thus, make sure there
1708 # is a .pod or .pm that might become that .html
1709 } else {
2a28b791 1710 $section = "#$section" if $section;
1711 ### print STDERR "...section=$section\n";
1712
54310121 1713 # check if there is a .pod with the page name
1714 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1715 $link = "$htmlroot/$1.html$section";
1716 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1717 $link = "$htmlroot/$1.html$section";
1718 } else {
54310121 1719 $link = "";
54310121 1720 }
1721 }
1722 }
1723
54310121 1724 if ($link) {
29f227c9 1725 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1726 # implies $htmlroot eq ''. This means that the link in question
1727 # needs a prefix of $htmldir if it begins with '/'. The test for
1728 # the initial '/' is done to avoid '#'-only links, and to allow
1729 # for other kinds of links, like file:, ftp:, etc.
1730 my $url ;
1731 if ( $htmlfileurl ne '' ) {
2a28b791 1732 $link = "$htmldir$link" if $link =~ m{^/};
1733 $url = relativize_url( $link, $htmlfileurl );
1734# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1735 }
1736 else {
1737 $url = $link ;
1738 }
2a28b791 1739 return $url;
29f227c9 1740
54310121 1741 } else {
2a28b791 1742 return undef();
54310121 1743 }
54310121 1744}
1745
1746#
29f227c9 1747# relativize_url - convert an absolute URL to one relative to a base URL.
1748# Assumes both end in a filename.
1749#
1750sub relativize_url {
1751 my ($dest,$source) = @_ ;
1752
1753 my ($dest_volume,$dest_directory,$dest_file) =
1754 File::Spec::Unix->splitpath( $dest ) ;
1755 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1756
1757 my ($source_volume,$source_directory,$source_file) =
1758 File::Spec::Unix->splitpath( $source ) ;
1759 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1760
1761 my $rel_path = '' ;
1762 if ( $dest ne '' ) {
1763 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1764 }
1765
1766 if ( $rel_path ne '' &&
1767 substr( $rel_path, -1 ) ne '/' &&
1768 substr( $dest_file, 0, 1 ) ne '#'
1769 ) {
1770 $rel_path .= "/$dest_file" ;
1771 }
1772 else {
1773 $rel_path .= "$dest_file" ;
1774 }
1775
1776 return $rel_path ;
1777}
1778
54310121 1779
1780#
2a28b791 1781# coderef - make URL from the text of a C<>
54310121 1782#
2a28b791 1783sub coderef($$){
1784 my( $page, $item ) = @_;
1785 my( $url );
1786
1787 my $fid = fragment_id( $item );
1788
1789 if( defined( $page ) ){
1790 # we have been given a $page...
1791 $page =~ s{::}{/}g;
1792
1793 # Do we take it? Item could be a section!
1794 my $base = $items{$fid};
1795 $base =~ s{[^/]*/}{};
1796 if( $base ne "$page.html" ){
1797 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1798 $page = undef();
1799 }
54310121 1800
2a28b791 1801 } else {
1802 # no page - local items precede cached items
1803 if( exists $local_items{$fid} ){
1804 $page = $local_items{$fid};
1805 } else {
1806 $page = $items{$fid};
1807 }
1808 }
54310121 1809
1810 # if there was a pod file that we found earlier with an appropriate
1811 # =item directive, then create a link to that page.
2a28b791 1812 if( defined $page ){
1813 if( $page ){
1814 if( $pages{$page} =~ /([^:.]*)\.[^:]*:/){
1815 $page = $1 . '.html';
29f227c9 1816 }
2a28b791 1817 my $link = "$htmlroot/$page#item_$fid";
54310121 1818
2a28b791 1819 # Here, we take advantage of the knowledge that $htmlfileurl
1820 # ne '' implies $htmlroot eq ''.
1821 if ( $htmlfileurl ne '' ) {
1822 $link = "$htmldir$link" ;
1823 $url = relativize_url( $link, $htmlfileurl ) ;
1824 } else {
1825 $url = $link ;
1826 }
1827 } else {
1828 $url = "#item_" . $fid;
1829 }
54310121 1830
2a28b791 1831 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1832 }
1833 return( $url, $fid );
54310121 1834}
1835
54310121 1836
1837
1838#
29f227c9 1839# Adapted from Nick Ing-Simmons' PodToHtml package.
1840sub relative_url {
1841 my $source_file = shift ;
1842 my $destination_file = shift;
1843
1844 my $source = URI::file->new_abs($source_file);
1845 my $uo = URI::file->new($destination_file,$source)->abs;
1846 return $uo->rel->as_string;
1847}
1848
1849
1850#
54310121 1851# finish_list - finish off any pending HTML lists. this should be called
1852# after the entire pod file has been read and converted.
1853#
1854sub finish_list {
7b8d334a 1855 while ($listlevel > 0) {
54310121 1856 print HTML "</DL>\n";
1857 $listlevel--;
1858 }
1859}
1860
1861#
1862# htmlify - converts a pod section specification to a suitable section
2a28b791 1863# specification for HTML. Note that we keep spaces and special characters
1864# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1865#
1866sub htmlify {
2a28b791 1867 my( $heading) = @_;
1868 $heading =~ s/(\s+)/ /g;
1869 $heading =~ s/\s+\Z//;
1870 $heading =~ s/\A\s+//;
1871 # The hyphen is a disgrace to the English language.
1872 $heading =~ s/[-"?]//g;
1873 $heading = lc( $heading );
1874 return $heading;
1875}
54310121 1876
2a28b791 1877#
1878# depod - convert text by eliminating all interior sequences
1879# Note: can be called with copy or modify semantics
1880#
1881my %E2c;
1882$E2c{lt} = '<';
1883$E2c{gt} = '>';
1884$E2c{sol} = '/';
1885$E2c{verbar} = '|';
1886
7ba65c74 1887sub depod1($;$);
1888
2a28b791 1889sub depod($){
1890 my $string;
1891 if( ref( $_[0] ) ){
1892 $string = ${$_[0]};
1893 ${$_[0]} = depod1( \$string );
1894 } else {
1895 $string = $_[0];
1896 depod1( \$string );
1897 }
1898}
54310121 1899
2a28b791 1900sub depod1($;$){
1901 my( $rstr, $func ) = @_;
1902 my $res = '';
1903 if( ! defined( $func ) ){
1904 # skip to next begin of an interior sequence
1905 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1906 # recurse into its text
1907 $res .= $1 . depod1( $rstr, $2 );
1908 }
1909 $res .= $$rstr;
1910 } elsif( $func eq 'E' ){
1911 # E<x> - convert to character
1912 $$rstr =~ s/^(\w+)>//;
1913 $res .= $E2c{$1};
1914 } elsif( $func eq 'X' ){
1915 # X<> - ignore
1916 $$rstr =~ s/^[^>]*>//;
1917 } elsif( $func eq 'Z' ){
1918 # Z<> - empty
1919 $$rstr =~ s/^>//;
1920 } else {
1921 # all others: either recurse into new function or
1922 # terminate at closing angle bracket
1923 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1924 $res .= $1;
1925 last if $2 eq '>';
1926 $res .= depod1( $rstr, substr($2,0,1) );
1927 }
1928 ## If we're here and $2 ne '>': undelimited interior sequence.
1929 ## Ignored, as this is called without proper indication of where we are.
1930 ## Rely on process_text to produce diagnostics.
1931 }
1932 return $res;
1933}
54310121 1934
2a28b791 1935#
1936# fragment_id - construct a fragment identifier from:
1937# a) =item text
1938# b) contents of C<...>
1939#
1940my @hc;
1941sub fragment_id {
1942 my $text = shift();
1943 $text =~ s/\s+\Z//s;
1944 if( $text ){
1945 # a method or function?
1946 return $1 if $text =~ /(\w+)\s*\(/;
1947 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1948
1949 # a variable name?
1950 return $1 if $text =~ /^([$@%*]\S+)/;
1951
1952 # some pattern matching operator?
1953 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1954
1955 # fancy stuff... like "do { }"
1956 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1957
1958 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1959 # and some funnies with ... Module ...
1960 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1961 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1962
1963 # text? normalize!
1964 $text =~ s/\s+/_/sg;
1965 $text =~ s{(\W)}{
1966 defined( $hc[ord($1)] ) ? $hc[ord($1)]
1967 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1968 $text = substr( $text, 0, 50 );
1969 } else {
1970 return undef();
1971 }
54310121 1972}
1973
2a28b791 1974#
1975# make_URL_href - generate HTML href from URL
1976# Special treatment for CGI queries.
1977#
1978sub make_URL_href($){
1979 my( $url ) = @_;
1980 if( $url !~
1981 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)?$}{<A HREF="$1$2">$1</A>}i ){
1982 $url = "<A HREF=\"$url\">$url</A>";
1983 }
1984 return $url;
54310121 1985}
1986
19871;