IO::Socket now sets $!, avoids eval/die (patch from Graham Barr
[p5sagit/p5-mst-13.2.git] / lib / Pod / Html.pm
CommitLineData
54310121 1package Pod::Html;
2
3use Pod::Functions;
4use Getopt::Long; # package for handling command-line parameters
29f227c9 5use File::Spec::Unix;
54310121 6require Exporter;
7b8d334a 7use vars qw($VERSION);
2a28b791 8$VERSION = 1.03;
54310121 9@ISA = Exporter;
10@EXPORT = qw(pod2html htmlify);
11use Cwd;
12
13use Carp;
14
3ec07288 15use locale; # make \w work right in non-ASCII lands
16
54310121 17use strict;
18
7b8d334a 19use Config;
20
54310121 21=head1 NAME
22
7b8d334a 23Pod::Html - module to convert pod files to HTML
54310121 24
25=head1 SYNOPSIS
26
7b8d334a 27 use Pod::Html;
54310121 28 pod2html([options]);
29
30=head1 DESCRIPTION
31
32Converts files from pod format (see L<perlpod>) to HTML format. It
33can automatically generate indexes and cross-references, and it keeps
34a cache of things it knows how to cross-reference.
35
36=head1 ARGUMENTS
37
38Pod::Html takes the following arguments:
39
40=over 4
41
42=item help
43
44 --help
45
46Displays the usage message.
47
5a039dd3 48=item htmldir
49
50 --htmldir=name
51
52Sets the directory in which the resulting HTML file is placed. This
29f227c9 53is used to generate relative links to other files. Not passing this
54causes all links to be absolute, since this is the value that tells
55Pod::Html the root of the documentation tree.
5a039dd3 56
54310121 57=item htmlroot
58
59 --htmlroot=name
60
61Sets the base URL for the HTML files. When cross-references are made,
62the HTML root is prepended to the URL.
63
64=item infile
65
66 --infile=name
67
68Specify the pod file to convert. Input is taken from STDIN if no
69infile is specified.
70
71=item outfile
72
73 --outfile=name
74
75Specify the HTML file to create. Output goes to STDOUT if no outfile
76is specified.
77
78=item podroot
79
80 --podroot=name
81
82Specify the base directory for finding library pods.
83
84=item podpath
85
86 --podpath=name:...:name
87
88Specify which subdirectories of the podroot contain pod files whose
89HTML converted forms can be linked-to in cross-references.
90
91=item libpods
92
93 --libpods=name:...:name
94
95List of page names (eg, "perlfunc") which contain linkable C<=item>s.
96
97=item netscape
98
99 --netscape
100
101Use Netscape HTML directives when applicable.
102
103=item nonetscape
104
105 --nonetscape
106
107Do not use Netscape HTML directives (default).
108
109=item index
110
111 --index
112
113Generate an index at the top of the HTML file (default behaviour).
114
115=item noindex
116
117 --noindex
118
119Do not generate an index at the top of the HTML file.
120
121
122=item recurse
123
124 --recurse
125
126Recurse into subdirectories specified in podpath (default behaviour).
127
128=item norecurse
129
130 --norecurse
131
132Do not recurse into subdirectories specified in podpath.
133
134=item title
135
136 --title=title
137
138Specify the title of the resulting HTML file.
139
34db337b 140=item css
141
142 --css=stylesheet
143
144Specify the URL of a cascading style sheet.
145
54310121 146=item verbose
147
148 --verbose
149
150Display progress messages.
151
34db337b 152=item quiet
153
154 --quiet
155
156Don't display I<mostly harmless> warning messages.
157
54310121 158=back
159
160=head1 EXAMPLE
161
162 pod2html("pod2html",
163 "--podpath=lib:ext:pod:vms",
164 "--podroot=/usr/src/perl",
165 "--htmlroot=/perl/nmanual",
166 "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
167 "--recurse",
168 "--infile=foo.pod",
169 "--outfile=/perl/nmanual/foo.html");
170
34db337b 171=head1 ENVIRONMENT
172
173Uses $Config{pod2html} to setup default options.
174
54310121 175=head1 AUTHOR
176
177Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
178
54310121 179=head1 SEE ALSO
180
181L<perlpod>
182
183=head1 COPYRIGHT
184
185This program is distributed under the Artistic License.
186
187=cut
188
1f763251 189my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
190my $dircache = "pod2htmd$cache_ext";
191my $itemcache = "pod2htmi$cache_ext";
54310121 192
193my @begin_stack = (); # begin/end stack
194
29f227c9 195my @libpods = (); # files to search for links from C<> directives
196my $htmlroot = "/"; # http-server base directory from which all
54310121 197 # relative paths in $podpath stem.
5a039dd3 198my $htmldir = ""; # The directory to which the html pages
199 # will (eventually) be written.
54310121 200my $htmlfile = ""; # write to stdout by default
29f227c9 201my $htmlfileurl = "" ; # The url that other files would use to
5a039dd3 202 # refer to this file. This is only used
203 # to make relative urls that point to
204 # other files.
54310121 205my $podfile = ""; # read from stdin by default
206my @podpath = (); # list of directories containing library pods.
207my $podroot = "."; # filesystem base directory from which all
208 # relative paths in $podpath stem.
34db337b 209my $css = ''; # Cascading style sheet
54310121 210my $recurse = 1; # recurse on subdirectories in $podpath.
34db337b 211my $quiet = 0; # not quiet by default
54310121 212my $verbose = 0; # not verbose by default
213my $doindex = 1; # non-zero if we should generate an index
214my $listlevel = 0; # current list depth
2a28b791 215my @listend = (); # the text to use to end the list.
216my $after_lpar = 0; # set to true after a par in an =item
54310121 217my $ignore = 1; # whether or not to format text. we don't
218 # format text until we hit our first pod
219 # directive.
220
221my %items_named = (); # for the multiples of the same item in perlfunc
222my @items_seen = ();
223my $netscape = 0; # whether or not to use netscape directives.
224my $title; # title to give the pod(s)
34db337b 225my $header = 0; # produce block header/footer
54310121 226my $top = 1; # true if we are at the top of the doc. used
227 # to prevent the first <HR> directive.
228my $paragraph; # which paragraph we're processing (used
229 # for error messages)
2a28b791 230my $ptQuote = 0; # status of double-quote conversion
54310121 231my %pages = (); # associative array used to find the location
232 # of pages referenced by L<> links.
233my %sections = (); # sections within this page
234my %items = (); # associative array used to find the location
235 # of =item directives referenced by C<> links
2a28b791 236my %local_items = (); # local items - avoid destruction of %items
39e571d4 237my $Is83; # is dos with short filenames (8.3)
238
54310121 239sub init_globals {
04f720db 240$dircache = "pod2htmd$cache_ext";
241$itemcache = "pod2htmi$cache_ext";
54310121 242
243@begin_stack = (); # begin/end stack
244
245@libpods = (); # files to search for links from C<> directives
246$htmlroot = "/"; # http-server base directory from which all
247 # relative paths in $podpath stem.
67398a75 248$htmldir = ""; # The directory to which the html pages
249 # will (eventually) be written.
54310121 250$htmlfile = ""; # write to stdout by default
251$podfile = ""; # read from stdin by default
252@podpath = (); # list of directories containing library pods.
253$podroot = "."; # filesystem base directory from which all
254 # relative paths in $podpath stem.
34db337b 255$css = ''; # Cascading style sheet
54310121 256$recurse = 1; # recurse on subdirectories in $podpath.
34db337b 257$quiet = 0; # not quiet by default
54310121 258$verbose = 0; # not verbose by default
259$doindex = 1; # non-zero if we should generate an index
260$listlevel = 0; # current list depth
2a28b791 261@listend = (); # the text to use to end the list.
262$after_lpar = 0; # set to true after a par in an =item
54310121 263$ignore = 1; # whether or not to format text. we don't
264 # format text until we hit our first pod
265 # directive.
266
267@items_seen = ();
268%items_named = ();
269$netscape = 0; # whether or not to use netscape directives.
34db337b 270$header = 0; # produce block header/footer
54310121 271$title = ''; # title to give the pod(s)
272$top = 1; # true if we are at the top of the doc. used
273 # to prevent the first <HR> directive.
274$paragraph = ''; # which paragraph we're processing (used
275 # for error messages)
54310121 276%sections = (); # sections within this page
3e3baf6d 277
278# These are not reinitialised here but are kept as a cache.
279# See get_cache and related cache management code.
280#%pages = (); # associative array used to find the location
281 # of pages referenced by L<> links.
282#%items = (); # associative array used to find the location
54310121 283 # of =item directives referenced by C<> links
2a28b791 284%local_items = ();
39e571d4 285$Is83=$^O eq 'dos';
54310121 286}
287
2a28b791 288#
289# clean_data: global clean-up of pod data
290#
291sub clean_data($){
292 my( $dataref ) = @_;
293 my $i;
294 for( $i = 0; $i <= $#$dataref; $i++ ){
295 ${$dataref}[$i] =~ s/\s+\Z//;
296
297 # have a look for all-space lines
298 if( ${$dataref}[$i] =~ /^\s+$/m ){
299 my @chunks = split( /^\s+$/m, ${$dataref}[$i] );
300 splice( @$dataref, $i, 1, @chunks );
301 }
302 }
303}
304
305
54310121 306sub pod2html {
307 local(@ARGV) = @_;
308 local($/);
309 local $_;
310
311 init_globals();
312
39e571d4 313 $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
314
54310121 315 # cache of %pages and %items from last time we ran pod2html
54310121 316
317 #undef $opt_help if defined $opt_help;
318
319 # parse the command-line parameters
320 parse_command_line();
321
322 # set some variables to their default values if necessary
323 local *POD;
324 unless (@ARGV && $ARGV[0]) {
325 $podfile = "-" unless $podfile; # stdin
326 open(POD, "<$podfile")
327 || die "$0: cannot open $podfile file for input: $!\n";
328 } else {
329 $podfile = $ARGV[0]; # XXX: might be more filenames
330 *POD = *ARGV;
331 }
332 $htmlfile = "-" unless $htmlfile; # stdout
333 $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
29f227c9 334 $htmldir =~ s#/$## ; # so we don't get a //
335 if ( $htmlroot eq ''
336 && defined( $htmldir )
337 && $htmldir ne ''
338 && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
339 )
5a039dd3 340 {
29f227c9 341 # Set the 'base' url for this file, so that we can use it
342 # as the location from which to calculate relative links
343 # to other files. If this is '', then absolute links will
344 # be used throughout.
345 $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
5a039dd3 346 }
54310121 347
348 # read the pod a paragraph at a time
349 warn "Scanning for sections in input file(s)\n" if $verbose;
350 $/ = "";
351 my @poddata = <POD>;
352 close(POD);
2a28b791 353 clean_data( \@poddata );
54310121 354
355 # scan the pod for =head[1-6] directives and build an index
356 my $index = scan_headings(\%sections, @poddata);
357
3e3baf6d 358 unless($index) {
31e56455 359 warn "No headings in $podfile\n" if $verbose;
3e3baf6d 360 }
361
54310121 362 # open the output file
363 open(HTML, ">$htmlfile")
364 || die "$0: cannot open $htmlfile file for output: $!\n";
365
d011ffae 366 # put a title in the HTML file if one wasn't specified
367 if ($title eq '') {
368 TITLE_SEARCH: {
369 for (my $i = 0; $i < @poddata; $i++) {
370 if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
371 for my $para ( @poddata[$i, $i+1] ) {
372 last TITLE_SEARCH
373 if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
374 }
375 }
54310121 376
d011ffae 377 }
378 }
379 }
3e3baf6d 380 if (!$title and $podfile =~ /\.pod$/) {
381 # probably a split pod so take first =head[12] as title
382 for (my $i = 0; $i < @poddata; $i++) {
383 last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
384 }
385 warn "adopted '$title' as title for $podfile\n"
386 if $verbose and $title;
387 }
7b8d334a 388 if ($title) {
389 $title =~ s/\s*\(.*\)//;
390 } else {
34db337b 391 warn "$0: no title for $podfile" unless $quiet;
54310121 392 $podfile =~ /^(.*)(\.[^.\/]+)?$/;
393 $title = ($podfile eq "-" ? 'No Title' : $1);
3e3baf6d 394 warn "using $title" if $verbose;
54310121 395 }
34db337b 396 my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
397 $csslink =~ s,\\,/,g;
398 $csslink =~ s,(/.):,$1|,;
399
400 my $block = $header ? <<END_OF_BLOCK : '';
401<TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
402<TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
403<FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
404</TD></TR>
405</TABLE>
406END_OF_BLOCK
407
54310121 408 print HTML <<END_OF_HEAD;
7b8d334a 409<HTML>
410<HEAD>
34db337b 411<TITLE>$title</TITLE>$csslink
7b8d334a 412<LINK REV="made" HREF="mailto:$Config{perladmin}">
413</HEAD>
54310121 414
7b8d334a 415<BODY>
34db337b 416$block
54310121 417END_OF_HEAD
418
3e3baf6d 419 # load/reload/validate/cache %pages and %items
420 get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
54310121 421
422 # scan the pod for =item directives
2a28b791 423 scan_items( \%local_items, "", @poddata);
54310121 424
425 # put an index at the top of the file. note, if $doindex is 0 we
426 # still generate an index, but surround it with an html comment.
427 # that way some other program can extract it if desired.
428 $index =~ s/--+/-/g;
2a28b791 429 print HTML "<A NAME=\"__index__\"></A>\n";
54310121 430 print HTML "<!-- INDEX BEGIN -->\n";
431 print HTML "<!--\n" unless $doindex;
432 print HTML $index;
433 print HTML "-->\n" unless $doindex;
434 print HTML "<!-- INDEX END -->\n\n";
31e56455 435 print HTML "<HR>\n" if $doindex and $index;
54310121 436
437 # now convert this file
2a28b791 438 my $after_item; # set to true after an =item
439 warn "Converting input file $podfile\n" if $verbose;
440 foreach my $i (0..$#poddata){
441 $ptQuote = 0; # status of quote conversion
442
54310121 443 $_ = $poddata[$i];
444 $paragraph = $i+1;
445 if (/^(=.*)/s) { # is it a pod directive?
446 $ignore = 0;
2a28b791 447 $after_item = 0;
54310121 448 $_ = $1;
449 if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
450 process_begin($1, $2);
451 } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
452 process_end($1, $2);
453 } elsif (/^=cut/) { # =cut
454 process_cut();
455 } elsif (/^=pod/) { # =pod
456 process_pod();
457 } else {
458 next if @begin_stack && $begin_stack[-1] ne 'html';
459
7b8d334a 460 if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
2a28b791 461 process_head( $1, $2, $doindex && $index );
462 } elsif (/^=item\s*(.*\S)?/sm) { # =item text
463 warn "$0: $podfile: =item without bullet, number or text"
228a48a5 464 . " in paragraph $paragraph.\n" if !defined($1) or $1 eq '';
2a28b791 465 process_item( $1 );
466 $after_item = 1;
54310121 467 } elsif (/^=over\s*(.*)/) { # =over N
468 process_over();
469 } elsif (/^=back/) { # =back
470 process_back();
a45bd81d 471 } elsif (/^=for\s+(\S+)\s*(.*)/si) {# =for
54310121 472 process_for($1,$2);
473 } else {
474 /^=(\S*)\s*/;
475 warn "$0: $podfile: unknown pod directive '$1' in "
476 . "paragraph $paragraph. ignoring.\n";
477 }
478 }
479 $top = 0;
480 }
481 else {
482 next if $ignore;
483 next if @begin_stack && $begin_stack[-1] ne 'html';
484 my $text = $_;
2a28b791 485 if( $text =~ /\A\s+/ ){
486 process_pre( \$text );
487 print HTML "<PRE>\n$text</PRE>\n";
488
489 } else {
490 process_text( \$text );
491
492 # experimental: check for a paragraph where all lines
493 # have some ...\t...\t...\n pattern
494 if( $text =~ /\t/ ){
495 my @lines = split( "\n", $text );
496 if( @lines > 1 ){
497 my $all = 2;
498 foreach my $line ( @lines ){
499 if( $line =~ /\S/ && $line !~ /\t/ ){
500 $all--;
501 last if $all == 0;
502 }
503 }
504 if( $all > 0 ){
505 $text =~ s/\t+/<TD>/g;
506 $text =~ s/^/<TR><TD>/gm;
507 $text = '<TABLE CELLSPACING=0 CELLPADDING=0>' .
508 $text . '</TABLE>';
509 }
510 }
511 }
512 ## end of experimental
513
514 if( $after_item ){
515 print HTML "$text\n";
516 $after_lpar = 1;
517 } else {
518 print HTML "<P>$text</P>\n";
519 }
520 }
521 $after_item = 0;
54310121 522 }
523 }
524
525 # finish off any pending directives
526 finish_list();
2a28b791 527
528 # link to page index
529 print HTML "<P><A HREF=\"#__index__\"><SMALL>page index</SMALL></A></P>\n"
530 if $doindex and $index;
531
54310121 532 print HTML <<END_OF_TAIL;
34db337b 533$block
7b8d334a 534</BODY>
54310121 535
7b8d334a 536</HTML>
54310121 537END_OF_TAIL
538
539 # close the html file
540 close(HTML);
541
542 warn "Finished\n" if $verbose;
543}
544
545##############################################################################
546
547my $usage; # see below
548sub usage {
549 my $podfile = shift;
550 warn "$0: $podfile: @_\n" if @_;
551 die $usage;
552}
553
554$usage =<<END_OF_USAGE;
555Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
556 --podpath=<name>:...:<name> --podroot=<name>
557 --libpods=<name>:...:<name> --recurse --verbose --index
558 --netscape --norecurse --noindex
559
560 --flush - flushes the item and directory caches.
561 --help - prints this message.
562 --htmlroot - http-server base directory from which all relative paths
563 in podpath stem (default is /).
564 --index - generate an index at the top of the resulting html
565 (default).
566 --infile - filename for the pod to convert (input taken from stdin
567 by default).
568 --libpods - colon-separated list of pages to search for =item pod
569 directives in as targets of C<> and implicit links (empty
570 by default). note, these are not filenames, but rather
571 page names like those that appear in L<> links.
572 --netscape - will use netscape html directives when applicable.
573 --nonetscape - will not use netscape directives (default).
574 --outfile - filename for the resulting html file (output sent to
575 stdout by default).
576 --podpath - colon-separated list of directories containing library
577 pods. empty by default.
578 --podroot - filesystem base directory from which all relative paths
579 in podpath stem (default is .).
580 --noindex - don't generate an index at the top of the resulting html.
581 --norecurse - don't recurse on those subdirectories listed in podpath.
582 --recurse - recurse on those subdirectories listed in podpath
583 (default behavior).
584 --title - title that will appear in resulting html file.
34db337b 585 --header - produce block header/footer
586 --css - stylesheet URL
54310121 587 --verbose - self-explanatory
34db337b 588 --quiet - supress some benign warning messages
54310121 589
590END_OF_USAGE
591
592sub parse_command_line {
2a28b791 593 my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
34db337b 594 unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
54310121 595 my $result = GetOptions(
29f227c9 596 'flush' => \$opt_flush,
597 'help' => \$opt_help,
598 'htmldir=s' => \$opt_htmldir,
54310121 599 'htmlroot=s' => \$opt_htmlroot,
29f227c9 600 'index!' => \$opt_index,
54310121 601 'infile=s' => \$opt_infile,
602 'libpods=s' => \$opt_libpods,
603 'netscape!' => \$opt_netscape,
604 'outfile=s' => \$opt_outfile,
605 'podpath=s' => \$opt_podpath,
606 'podroot=s' => \$opt_podroot,
54310121 607 'recurse!' => \$opt_recurse,
608 'title=s' => \$opt_title,
34db337b 609 'header' => \$opt_header,
610 'css=s' => \$opt_css,
54310121 611 'verbose' => \$opt_verbose,
34db337b 612 'quiet' => \$opt_quiet,
54310121 613 );
614 usage("-", "invalid parameters") if not $result;
615
616 usage("-") if defined $opt_help; # see if the user asked for help
617 $opt_help = ""; # just to make -w shut-up.
618
619 $podfile = $opt_infile if defined $opt_infile;
620 $htmlfile = $opt_outfile if defined $opt_outfile;
67398a75 621 $htmldir = $opt_htmldir if defined $opt_htmldir;
54310121 622
623 @podpath = split(":", $opt_podpath) if defined $opt_podpath;
624 @libpods = split(":", $opt_libpods) if defined $opt_libpods;
625
626 warn "Flushing item and directory caches\n"
627 if $opt_verbose && defined $opt_flush;
628 unlink($dircache, $itemcache) if defined $opt_flush;
629
630 $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
631 $podroot = $opt_podroot if defined $opt_podroot;
632
633 $doindex = $opt_index if defined $opt_index;
634 $recurse = $opt_recurse if defined $opt_recurse;
635 $title = $opt_title if defined $opt_title;
34db337b 636 $header = defined $opt_header ? 1 : 0;
637 $css = $opt_css if defined $opt_css;
54310121 638 $verbose = defined $opt_verbose ? 1 : 0;
34db337b 639 $quiet = defined $opt_quiet ? 1 : 0;
54310121 640 $netscape = $opt_netscape if defined $opt_netscape;
641}
642
3e3baf6d 643
644my $saved_cache_key;
645
646sub get_cache {
647 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
648 my @cache_key_args = @_;
649
650 # A first-level cache:
651 # Don't bother reading the cache files if they still apply
652 # and haven't changed since we last read them.
653
654 my $this_cache_key = cache_key(@cache_key_args);
655
656 return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
657
658 # load the cache of %pages and %items if possible. $tests will be
659 # non-zero if successful.
660 my $tests = 0;
661 if (-f $dircache && -f $itemcache) {
662 warn "scanning for item cache\n" if $verbose;
663 $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
664 }
665
666 # if we didn't succeed in loading the cache then we must (re)build
667 # %pages and %items.
668 if (!$tests) {
669 warn "scanning directories in pod-path\n" if $verbose;
670 scan_podpath($podroot, $recurse, 0);
671 }
672 $saved_cache_key = cache_key(@cache_key_args);
673}
674
675sub cache_key {
676 my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
677 return join('!', $dircache, $itemcache, $recurse,
29f227c9 678 @$podpath, $podroot, stat($dircache), stat($itemcache));
3e3baf6d 679}
680
54310121 681#
3e3baf6d 682# load_cache - tries to find if the caches stored in $dircache and $itemcache
54310121 683# are valid caches of %pages and %items. if they are valid then it loads
684# them and returns a non-zero value.
685#
3e3baf6d 686sub load_cache {
54310121 687 my($dircache, $itemcache, $podpath, $podroot) = @_;
688 my($tests);
689 local $_;
690
691 $tests = 0;
692
693 open(CACHE, "<$itemcache") ||
694 die "$0: error opening $itemcache for reading: $!\n";
695 $/ = "\n";
696
697 # is it the same podpath?
698 $_ = <CACHE>;
699 chomp($_);
3e3baf6d 700 $tests++ if (join(":", @$podpath) eq $_);
54310121 701
702 # is it the same podroot?
703 $_ = <CACHE>;
704 chomp($_);
705 $tests++ if ($podroot eq $_);
706
707 # load the cache if its good
708 if ($tests != 2) {
709 close(CACHE);
54310121 710 return 0;
711 }
712
713 warn "loading item cache\n" if $verbose;
714 while (<CACHE>) {
715 /(.*?) (.*)$/;
716 $items{$1} = $2;
717 }
718 close(CACHE);
719
720 warn "scanning for directory cache\n" if $verbose;
721 open(CACHE, "<$dircache") ||
722 die "$0: error opening $dircache for reading: $!\n";
723 $/ = "\n";
724 $tests = 0;
725
726 # is it the same podpath?
727 $_ = <CACHE>;
728 chomp($_);
3e3baf6d 729 $tests++ if (join(":", @$podpath) eq $_);
54310121 730
731 # is it the same podroot?
732 $_ = <CACHE>;
733 chomp($_);
734 $tests++ if ($podroot eq $_);
735
736 # load the cache if its good
737 if ($tests != 2) {
738 close(CACHE);
54310121 739 return 0;
740 }
741
742 warn "loading directory cache\n" if $verbose;
743 while (<CACHE>) {
744 /(.*?) (.*)$/;
745 $pages{$1} = $2;
746 }
747
748 close(CACHE);
749
750 return 1;
751}
752
753#
754# scan_podpath - scans the directories specified in @podpath for directories,
755# .pod files, and .pm files. it also scans the pod files specified in
756# @libpods for =item directives.
757#
758sub scan_podpath {
3e3baf6d 759 my($podroot, $recurse, $append) = @_;
54310121 760 my($pwd, $dir);
761 my($libpod, $dirname, $pod, @files, @poddata);
762
3e3baf6d 763 unless($append) {
764 %items = ();
765 %pages = ();
766 }
767
54310121 768 # scan each directory listed in @podpath
769 $pwd = getcwd();
770 chdir($podroot)
771 || die "$0: error changing to directory $podroot: $!\n";
772 foreach $dir (@podpath) {
773 scan_dir($dir, $recurse);
774 }
775
776 # scan the pods listed in @libpods for =item directives
777 foreach $libpod (@libpods) {
778 # if the page isn't defined then we won't know where to find it
779 # on the system.
780 next unless defined $pages{$libpod} && $pages{$libpod};
781
782 # if there is a directory then use the .pod and .pm files within it.
29f227c9 783 # NOTE: Only finds the first so-named directory in the tree.
784# if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
785 if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 786 # find all the .pod and .pm files within the directory
787 $dirname = $1;
788 opendir(DIR, $dirname) ||
789 die "$0: error opening directory $dirname: $!\n";
790 @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
791 closedir(DIR);
792
793 # scan each .pod and .pm file for =item directives
794 foreach $pod (@files) {
795 open(POD, "<$dirname/$pod") ||
796 die "$0: error opening $dirname/$pod for input: $!\n";
797 @poddata = <POD>;
798 close(POD);
2a28b791 799 clean_data( \@poddata );
54310121 800
2a28b791 801 scan_items( \%items, "$dirname/$pod", @poddata);
54310121 802 }
803
804 # use the names of files as =item directives too.
2a28b791 805### Don't think this should be done this way - confuses issues.(WL)
806### foreach $pod (@files) {
807### $pod =~ /^(.*)(\.pod|\.pm)$/;
808### $items{$1} = "$dirname/$1.html" if $1;
809### }
54310121 810 } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
811 $pages{$libpod} =~ /([^:]*\.pm):/) {
812 # scan the .pod or .pm file for =item directives
813 $pod = $1;
814 open(POD, "<$pod") ||
815 die "$0: error opening $pod for input: $!\n";
816 @poddata = <POD>;
817 close(POD);
2a28b791 818 clean_data( \@poddata );
54310121 819
2a28b791 820 scan_items( \%items, "$pod", @poddata);
54310121 821 } else {
822 warn "$0: shouldn't be here (line ".__LINE__."\n";
823 }
824 }
825 @poddata = (); # clean-up a bit
826
827 chdir($pwd)
828 || die "$0: error changing to directory $pwd: $!\n";
829
830 # cache the item list for later use
831 warn "caching items for later use\n" if $verbose;
832 open(CACHE, ">$itemcache") ||
833 die "$0: error open $itemcache for writing: $!\n";
834
835 print CACHE join(":", @podpath) . "\n$podroot\n";
836 foreach my $key (keys %items) {
837 print CACHE "$key $items{$key}\n";
838 }
839
840 close(CACHE);
841
842 # cache the directory list for later use
843 warn "caching directories for later use\n" if $verbose;
844 open(CACHE, ">$dircache") ||
845 die "$0: error open $dircache for writing: $!\n";
846
847 print CACHE join(":", @podpath) . "\n$podroot\n";
848 foreach my $key (keys %pages) {
849 print CACHE "$key $pages{$key}\n";
850 }
851
852 close(CACHE);
853}
854
855#
856# scan_dir - scans the directory specified in $dir for subdirectories, .pod
857# files, and .pm files. notes those that it finds. this information will
858# be used later in order to figure out where the pages specified in L<>
859# links are on the filesystem.
860#
861sub scan_dir {
862 my($dir, $recurse) = @_;
863 my($t, @subdirs, @pods, $pod, $dirname, @dirs);
864 local $_;
865
866 @subdirs = ();
867 @pods = ();
868
869 opendir(DIR, $dir) ||
870 die "$0: error opening directory $dir: $!\n";
871 while (defined($_ = readdir(DIR))) {
872 if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
873 $pages{$_} = "" unless defined $pages{$_};
874 $pages{$_} .= "$dir/$_:";
875 push(@subdirs, $_);
876 } elsif (/\.pod$/) { # .pod
877 s/\.pod$//;
878 $pages{$_} = "" unless defined $pages{$_};
879 $pages{$_} .= "$dir/$_.pod:";
880 push(@pods, "$dir/$_.pod");
881 } elsif (/\.pm$/) { # .pm
882 s/\.pm$//;
883 $pages{$_} = "" unless defined $pages{$_};
884 $pages{$_} .= "$dir/$_.pm:";
885 push(@pods, "$dir/$_.pm");
886 }
887 }
888 closedir(DIR);
889
890 # recurse on the subdirectories if necessary
891 if ($recurse) {
892 foreach my $subdir (@subdirs) {
893 scan_dir("$dir/$subdir", $recurse);
894 }
895 }
896}
897
898#
899# scan_headings - scan a pod file for head[1-6] tags, note the tags, and
900# build an index.
901#
902sub scan_headings {
903 my($sections, @data) = @_;
2a28b791 904 my($tag, $which_head, $otitle, $listdepth, $index);
54310121 905
be173d55 906 # here we need local $ignore = 0;
907 # unfortunately, we can't have it, because $ignore is lexical
908 $ignore = 0;
909
54310121 910 $listdepth = 0;
911 $index = "";
912
913 # scan for =head directives, note their name, and build an index
914 # pointing to each of them.
915 foreach my $line (@data) {
bb9460ed 916 if ($line =~ /^=(head)([1-6])\s+(.*)/) {
2a28b791 917 ($tag, $which_head, $otitle) = ($1,$2,$3);
918
919 my $title = depod( $otitle );
920 my $name = htmlify( $title );
921 $$sections{$name} = 1;
922 $title = process_text( \$otitle );
54310121 923
102c538a 924 while ($which_head != $listdepth) {
925 if ($which_head > $listdepth) {
926 $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
927 $listdepth++;
928 } elsif ($which_head < $listdepth) {
929 $listdepth--;
930 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
931 }
54310121 932 }
54310121 933
934 $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
2a28b791 935 "<A HREF=\"#" . $name . "\">" .
936 $title . "</A></LI>";
54310121 937 }
938 }
939
940 # finish off the lists
941 while ($listdepth--) {
942 $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
943 }
944
945 # get rid of bogus lists
946 $index =~ s,\t*<UL>\s*</UL>\n,,g;
947
bb9460ed 948 $ignore = 1; # restore old value;
be173d55 949
54310121 950 return $index;
951}
952
953#
954# scan_items - scans the pod specified by $pod for =item directives. we
955# will use this information later on in resolving C<> links.
956#
957sub scan_items {
2a28b791 958 my( $itemref, $pod, @poddata ) = @_;
54310121 959 my($i, $item);
960 local $_;
961
962 $pod =~ s/\.pod$//;
963 $pod .= ".html" if $pod;
964
965 foreach $i (0..$#poddata) {
2a28b791 966 my $txt = depod( $poddata[$i] );
967
968 # figure out what kind of item it is.
969 # Build string for referencing this item.
970 if ( $txt =~ /\A=item\s+\*\s*(.*)\Z/s ) { # bullet
971 next unless $1;
972 $item = $1;
973 } elsif( $txt =~ /\A=item\s+(?>\d+\.?)\s*(.*)\Z/s ) { # numbered list
974 $item = $1;
975 } elsif( $txt =~ /\A=item\s+(.*)\Z/s ) { # plain item
976 $item = $1;
977 } else {
978 next;
54310121 979 }
2a28b791 980 my $fid = fragment_id( $item );
981 $$itemref{$fid} = "$pod" if $fid;
54310121 982 }
983}
984
985#
986# process_head - convert a pod head[1-6] tag and convert it to HTML format.
987#
988sub process_head {
2a28b791 989 my($tag, $heading, $hasindex) = @_;
54310121 990
991 # figure out the level of the =head
992 $tag =~ /head([1-6])/;
993 my $level = $1;
994
2a28b791 995 if( $listlevel ){
996 warn "$0: $podfile: unterminated list at =head in paragraph $paragraph. ignoring.\n";
997 while( $listlevel ){
998 process_back();
999 }
1000 }
1001
1002 print HTML "<P>\n";
1003 if( $level == 1 && ! $top ){
1004 print HTML "<A HREF=\"#__index__\"><SMALL>page index</SMALL></A>\n"
1005 if $hasindex;
1006 print HTML "<HR>\n"
1007 }
1008
1009 my $name = htmlify( depod( $heading ) );
1010 my $convert = process_text( \$heading );
1011 print HTML "<H$level><A NAME=\"$name\">$convert</A></H$level>\n";
54310121 1012}
1013
2a28b791 1014
54310121 1015#
2a28b791 1016# emit_item_tag - print an =item's text
1017# Note: The global $EmittedItem is used for inhibiting self-references.
54310121 1018#
2a28b791 1019my $EmittedItem;
1020
1021sub emit_item_tag($$$){
1022 my( $otext, $text, $compact ) = @_;
1023 my $item = fragment_id( $text );
54310121 1024
2a28b791 1025 $EmittedItem = $item;
1026 ### print STDERR "emit_item_tag=$item ($text)\n";
54310121 1027
2a28b791 1028 print HTML '<STRONG>';
1029 if ($items_named{$item}++) {
1030 print HTML process_text( \$otext );
1031 } else {
1032 my $name = 'item_' . $item;
1033 print HTML qq{<A NAME="$name">}, process_text( \$otext ), '</A>';
1034 }
1035 print HTML "</STRONG><BR>\n";
1036 undef( $EmittedItem );
1037}
1038
1039sub emit_li {
1040 my( $tag ) = @_;
1041 if( $items_seen[$listlevel]++ == 0 ){
1042 push( @listend, "</$tag>" );
1043 print HTML "<$tag>\n";
1044 }
1045 print HTML $tag eq 'DL' ? '<DT>' : '<LI>';
1046}
1047
1048#
1049# process_item - convert a pod item tag and convert it to HTML format.
1050#
1051sub process_item {
1052 my( $otext ) = @_;
54310121 1053
1054 # lots of documents start a list without doing an =over. this is
1055 # bad! but, the proper thing to do seems to be to just assume
1056 # they did do an =over. so warn them once and then continue.
2a28b791 1057 if( $listlevel == 0 ){
1058 warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n";
1059 process_over();
1060 }
54310121 1061
2a28b791 1062 # formatting: insert a paragraph if preceding item has >1 paragraph
1063 if( $after_lpar ){
1064 print HTML "<P></P>\n";
1065 $after_lpar = 0;
1066 }
54310121 1067
1068 # remove formatting instructions from the text
2a28b791 1069 my $text = depod( $otext );
1070
1071 # all the list variants:
1072 if( $text =~ /\A\*/ ){ # bullet
1073 emit_li( 'UL' );
1074 if ($text =~ /\A\*\s+(.+)\Z/s ) { # with additional text
1075 my $tag = $1;
1076 $otext =~ s/\A\*\s+//;
1077 emit_item_tag( $otext, $tag, 1 );
54310121 1078 }
1079
2a28b791 1080 } elsif( $text =~ /\A\d+/ ){ # numbered list
1081 emit_li( 'OL' );
1082 if ($text =~ /\A(?>\d+\.?)\s*(.+)\Z/s ) { # with additional text
1083 my $tag = $1;
1084 $otext =~ s/\A\d+\.?\s*//;
1085 emit_item_tag( $otext, $tag, 1 );
7b8d334a 1086 }
54310121 1087
2a28b791 1088 } else { # definition list
1089 emit_li( 'DL' );
1090 if ($text =~ /\A(.+)\Z/s ){ # should have text
1091 emit_item_tag( $otext, $text, 1 );
7b8d334a 1092 }
54310121 1093 print HTML '<DD>';
1094 }
54310121 1095 print HTML "\n";
1096}
1097
1098#
2a28b791 1099# process_over - process a pod over tag and start a corresponding HTML list.
54310121 1100#
1101sub process_over {
1102 # start a new list
1103 $listlevel++;
2a28b791 1104 push( @items_seen, 0 );
1105 $after_lpar = 0;
54310121 1106}
1107
1108#
1109# process_back - process a pod back tag and convert it to HTML format.
1110#
1111sub process_back {
2a28b791 1112 if( $listlevel == 0 ){
1113 warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n";
1114 return;
1115 }
54310121 1116
1117 # close off the list. note, I check to see if $listend[$listlevel] is
1118 # defined because an =item directive may have never appeared and thus
1119 # $listend[$listlevel] may have never been initialized.
1120 $listlevel--;
2a28b791 1121 if( defined $listend[$listlevel] ){
1122 print HTML '<P></P>' if $after_lpar;
1123 print HTML $listend[$listlevel];
1124 print HTML "\n";
1125 pop( @listend );
1126 }
1127 $after_lpar = 0;
54310121 1128
2a28b791 1129 # clean up item count
1130 pop( @items_seen );
54310121 1131}
1132
1133#
2a28b791 1134# process_cut - process a pod cut tag, thus start ignoring pod directives.
54310121 1135#
1136sub process_cut {
1137 $ignore = 1;
1138}
1139
1140#
2a28b791 1141# process_pod - process a pod pod tag, thus stop ignoring pod directives
1142# until we see a corresponding cut.
54310121 1143#
1144sub process_pod {
1145 # no need to set $ignore to 0 cause the main loop did it
1146}
1147
1148#
2a28b791 1149# process_for - process a =for pod tag. if it's for html, spit
c4d9b39d 1150# it out verbatim, if illustration, center it, otherwise ignore it.
54310121 1151#
1152sub process_for {
1153 my($whom, $text) = @_;
1154 if ( $whom =~ /^(pod2)?html$/i) {
1155 print HTML $text;
c4d9b39d 1156 } elsif ($whom =~ /^illustration$/i) {
1157 1 while chomp $text;
1158 for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
1159 $text .= $ext, last if -r "$text$ext";
1160 }
1161 print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
1162 }
54310121 1163}
1164
1165#
1166# process_begin - process a =begin pod tag. this pushes
1167# whom we're beginning on the begin stack. if there's a
1168# begin stack, we only print if it us.
1169#
1170sub process_begin {
1171 my($whom, $text) = @_;
1172 $whom = lc($whom);
1173 push (@begin_stack, $whom);
1174 if ( $whom =~ /^(pod2)?html$/) {
1175 print HTML $text if $text;
1176 }
1177}
1178
1179#
1180# process_end - process a =end pod tag. pop the
1181# begin stack. die if we're mismatched.
1182#
1183sub process_end {
1184 my($whom, $text) = @_;
1185 $whom = lc($whom);
1186 if ($begin_stack[-1] ne $whom ) {
1187 die "Unmatched begin/end at chunk $paragraph\n"
1188 }
2a28b791 1189 pop( @begin_stack );
54310121 1190}
1191
1192#
2a28b791 1193# process_pre - indented paragraph, made into <PRE></PRE>
54310121 1194#
2a28b791 1195sub process_pre {
1196 my( $text ) = @_;
1197 my( $rest );
54310121 1198 return if $ignore;
1199
54310121 1200 $rest = $$text;
1201
2a28b791 1202 # insert spaces in place of tabs
1203 $rest =~ s#.*#
be173d55 1204 my $line = $&;
1205 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
1206 $line;
1207 #eg;
54310121 1208
2a28b791 1209 # convert some special chars to HTML escapes
1210 $rest =~ s/&/&amp;/g;
1211 $rest =~ s/</&lt;/g;
1212 $rest =~ s/>/&gt;/g;
1213 $rest =~ s/"/&quot;/g;
1214
1215 # try and create links for all occurrences of perl.* within
1216 # the preformatted text.
1217 $rest =~ s{
1218 (\s*)(perl\w+)
1219 }{
1220 if ( defined $pages{$2} ){ # is a link
1221 qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
1222 } elsif (defined $pages{dosify($2)}) { # is a link
1223 qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
1224 } else {
1225 "$1$2";
1226 }
1227 }xeg;
1228 $rest =~ s{
1229 (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
1230 }{
1231 my $url ;
1232 if ( $htmlfileurl ne '' ){
1233 # Here, we take advantage of the knowledge
1234 # that $htmlfileurl ne '' implies $htmlroot eq ''.
1235 # Since $htmlroot eq '', we need to prepend $htmldir
1236 # on the fron of the link to get the absolute path
1237 # of the link's target. We check for a leading '/'
1238 # to avoid corrupting links that are #, file:, etc.
1239 my $old_url = $3 ;
1240 $old_url = "$htmldir$old_url" if $old_url =~ m{^\/};
1241 $url = relativize_url( "$old_url.html", $htmlfileurl );
1242 } else {
1243 $url = "$3.html" ;
1244 }
1245 "$1$url" ;
1246 }xeg;
1247
1248 # Look for embedded URLs and make them into links. We don't
1249 # relativize them since they are best left as the author intended.
1250
1251 my $urls = '(' . join ('|', qw{
54310121 1252 http
1253 telnet
1254 mailto
1255 news
1256 gopher
1257 file
1258 wais
1259 ftp
1260 } )
1261 . ')';
1262
2a28b791 1263 my $ltrs = '\w';
1264 my $gunk = '/#~:.?+=&%@!\-';
1265 my $punc = '.:?\-';
1266 my $any = "${ltrs}${gunk}${punc}";
54310121 1267
2a28b791 1268 $rest =~ s{
54310121 1269 \b # start at word boundary
1270 ( # begin $1 {
29f227c9 1271 $urls : # need resource and a colon
1272 (?!:) # Ignore File::, among others.
54310121 1273 [$any] +? # followed by on or more
1274 # of any valid character, but
1275 # be conservative and take only
1276 # what you need to....
1277 ) # end $1 }
1278 (?= # look-ahead non-consumptive assertion
1279 [$punc]* # either 0 or more puntuation
1280 [^$any] # followed by a non-url char
1281 | # or else
1282 $ # then end of the string
1283 )
1284 }{<A HREF="$1">$1</A>}igox;
1285
2a28b791 1286 # text should be as it is (verbatim)
1287 $$text = $rest;
1288}
54310121 1289
54310121 1290
2a28b791 1291#
1292# pure text processing
1293#
1294# pure_text/inIS_text: differ with respect to automatic C<> recognition.
1295# we don't want this to happen within IS
1296#
1297sub pure_text($){
1298 my $text = shift();
1299 process_puretext( $text, \$ptQuote, 1 );
54310121 1300}
1301
2a28b791 1302sub inIS_text($){
1303 my $text = shift();
1304 process_puretext( $text, \$ptQuote, 0 );
1305}
54310121 1306
1307#
1308# process_puretext - process pure text (without pod-escapes) converting
1309# double-quotes and handling implicit C<> links.
1310#
1311sub process_puretext {
2a28b791 1312 my($text, $quote, $notinIS) = @_;
54310121 1313
2a28b791 1314 ## Guessing at func() or [$@%&]*var references in plain text is destined
1315 ## to produce some strange looking ref's. uncomment to disable:
1316 ## $notinIS = 0;
1317
1318 my(@words, $lead, $trail);
54310121 1319
2a28b791 1320 # convert double-quotes to single-quotes
1321 if( $$quote && $text =~ s/"/''/s ){
1322 $$quote = 0;
1323 }
1324 while ($text =~ s/"([^"]*)"/``$1''/sg) {};
1325 $$quote = 1 if $text =~ s/"/``/s;
54310121 1326
1327 # keep track of leading and trailing white-space
2a28b791 1328 $lead = ($text =~ s/\A(\s+)//s ? $1 : "");
1329 $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");
54310121 1330
2a28b791 1331 # split at space/non-space boundaries
1332 @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );
54310121 1333
1334 # process each word individually
1335 foreach my $word (@words) {
2a28b791 1336 # skip space runs
1337 next if $word =~ /^\s*$/;
54310121 1338 # see if we can infer a link
67398a75 1339 if( $notinIS && $word =~ /^(\w+)\((.*)\)\W*$/ ) {
54310121 1340 # has parenthesis so should have been a C<> ref
2a28b791 1341 ## try for a pagename (perlXXX(1))?
1342 if( $2 =~ /^\d+$/ ){
1343 my $url = page_sect( $word, '' );
1344 if( defined $url ){
1345 $word = "<A HREF=\"$url\">the $word manpage</A>";
1346 next;
1347 }
1348 }
1349 $word = emit_C( $word );
1350
1351#### disabled. either all (including $\W, $\w+{.*} etc.) or nothing.
1352## } elsif( $notinIS && $word =~ /^[\$\@%&*]+\w+$/) {
1353## # perl variables, should be a C<> ref
1354## $word = emit_C( $word );
1355
54310121 1356 } elsif ($word =~ m,^\w+://\w,) {
1357 # looks like a URL
5a039dd3 1358 # Don't relativize it: leave it as the author intended
54310121 1359 $word = qq(<A HREF="$word">$word</A>);
af47ee55 1360 } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
54310121 1361 # looks like an e-mail address
7b8d334a 1362 my ($w1, $w2, $w3) = ("", $word, "");
1363 ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
1364 ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
1365 $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
54310121 1366 } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
7b8d334a 1367 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1368 $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
1369 } else {
7b8d334a 1370 $word = html_escape($word) if $word =~ /["&<>]/;
54310121 1371 }
1372 }
1373
2a28b791 1374 # put everything back together
1375 return $lead . join( '', @words ) . $trail;
1376}
1377
54310121 1378
2a28b791 1379#
1380# process_text - handles plaintext that appears in the input pod file.
1381# there may be pod commands embedded within the text so those must be
1382# converted to html commands.
1383#
7ba65c74 1384
1385sub process_text1($$;$);
1386
2a28b791 1387sub process_text {
1388 return if $ignore;
1389 my( $tref ) = @_;
1390 my $res = process_text1( 0, $tref );
1391 $$tref = $res;
1392}
1393
1394sub process_text1($$;$){
1395 my( $lev, $rstr, $func ) = @_;
1396 $lev++ unless defined $func;
1397 my $res = '';
1398
228a48a5 1399 $func ||= '';
2a28b791 1400 if( $func eq 'B' ){
1401 # B<text> - boldface
1402 $res = '<STRONG>' . process_text1( $lev, $rstr ) . '</STRONG>';
1403
1404 } elsif( $func eq 'C' ){
1405 # C<code> - can be a ref or <CODE></CODE>
1406 # need to extract text
1407 my $par = go_ahead( $rstr, 'C' );
1408
1409 ## clean-up of the link target
1410 my $text = depod( $par );
1411
1412 ### my $x = $par =~ /[BI]</ ? 'yes' : 'no' ;
1413 ### print STDERR "-->call emit_C($par) lev=$lev, par with BI=$x\n";
1414
1415 $res = emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1416
1417 } elsif( $func eq 'E' ){
1418 # E<x> - convert to character
1419 $$rstr =~ s/^(\w+)>//;
1420 $res = "&$1;";
1421
1422 } elsif( $func eq 'F' ){
1423 # F<filename> - italizice
1424 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1425
1426 } elsif( $func eq 'I' ){
1427 # I<text> - italizice
1428 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1429
1430 } elsif( $func eq 'L' ){
1431 # L<link> - link
1432 ## L<text|cross-ref> => produce text, use cross-ref for linking
1433 ## L<cross-ref> => make text from cross-ref
1434 ## need to extract text
1435 my $par = go_ahead( $rstr, 'L' );
1436
1437 # some L<>'s that shouldn't be:
1438 # a) full-blown URL's are emitted as-is
1439 if( $par =~ m{^\w+://}s ){
1440 return make_URL_href( $par );
1441 }
1442 # b) C<...> is stripped and treated as C<>
1443 if( $par =~ /^C<(.*)>$/ ){
1444 my $text = depod( $1 );
1445 return emit_C( $text, $lev > 1 || ($par =~ /[BI]</) );
1446 }
1447
1448 # analyze the contents
1449 $par =~ s/\n/ /g; # undo word-wrapped tags
1450 my $opar = $par;
1451 my $linktext;
1452 if( $par =~ s{^([^|]+)\|}{} ){
1453 $linktext = $1;
1454 }
1455
1456 # make sure sections start with a /
1457 $par =~ s{^"}{/"};
1458
1459 my( $page, $section, $ident );
1460
1461 # check for link patterns
1462 if( $par =~ m{^([^/]+?)/(?!")(.*?)$} ){ # name/ident
1463 # we've got a name/ident (no quotes)
1464 ( $page, $ident ) = ( $1, $2 );
1465 ### print STDERR "--> L<$par> to page $page, ident $ident\n";
1466
1467 } elsif( $par =~ m{^(.*?)/"?(.*?)"?$} ){ # [name]/"section"
1468 # even though this should be a "section", we go for ident first
1469 ( $page, $ident ) = ( $1, $2 );
1470 ### print STDERR "--> L<$par> to page $page, section $section\n";
1471
1472 } elsif( $par =~ /\s/ ){ # this must be a section with missing quotes
1473 ( $page, $section ) = ( '', $par );
1474 ### print STDERR "--> L<$par> to void page, section $section\n";
1475
1476 } else {
1477 ( $page, $section ) = ( $par, '' );
1478 ### print STDERR "--> L<$par> to page $par, void section\n";
1479 }
1480
1481 # now, either $section or $ident is defined. the convoluted logic
1482 # below tries to resolve L<> according to what the user specified.
1483 # failing this, we try to find the next best thing...
1484 my( $url, $ltext, $fid );
1485
1486 RESOLVE: {
1487 if( defined $ident ){
1488 ## try to resolve $ident as an item
1489 ( $url, $fid ) = coderef( $page, $ident );
1490 if( $url ){
1491 if( ! defined( $linktext ) ){
1492 $linktext = $ident;
1493 $linktext .= " in " if $ident && $page;
1494 $linktext .= "the $page manpage" if $page;
1495 }
1496 ### print STDERR "got coderef url=$url\n";
1497 last RESOLVE;
1498 }
1499 ## no luck: go for a section (auto-quoting!)
1500 $section = $ident;
1501 }
1502 ## now go for a section
1503 my $htmlsection = htmlify( $section );
1504 $url = page_sect( $page, $htmlsection );
1505 if( $url ){
1506 if( ! defined( $linktext ) ){
1507 $linktext = $section;
1508 $linktext .= " in " if $section && $page;
1509 $linktext .= "the $page manpage" if $page;
1510 }
1511 ### print STDERR "got page/section url=$url\n";
1512 last RESOLVE;
1513 }
1514 ## no luck: go for an ident
1515 if( $section ){
1516 $ident = $section;
1517 } else {
1518 $ident = $page;
1519 $page = undef();
1520 }
1521 ( $url, $fid ) = coderef( $page, $ident );
1522 if( $url ){
1523 if( ! defined( $linktext ) ){
1524 $linktext = $ident;
1525 $linktext .= " in " if $ident && $page;
1526 $linktext .= "the $page manpage" if $page;
1527 }
1528 ### print STDERR "got section=>coderef url=$url\n";
1529 last RESOLVE;
1530 }
1531
1532 # warning; show some text.
1533 $linktext = $opar unless defined $linktext;
1534 warn "$0: $podfile: cannot resolve L<$opar> in paragraph $paragraph.";
1535 }
1536
1537 # now we have an URL or just plain code
1538 $$rstr = $linktext . '>' . $$rstr;
1539 if( defined( $url ) ){
1540 $res = "<A HREF=\"$url\">" . process_text1( $lev, $rstr ) . '</A>';
1541 } else {
1542 $res = '<EM>' . process_text1( $lev, $rstr ) . '</EM>';
1543 }
1544
1545 } elsif( $func eq 'S' ){
1546 # S<text> - non-breaking spaces
1547 $res = process_text1( $lev, $rstr );
1548 $res =~ s/ /&nbsp;/g;
1549
1550 } elsif( $func eq 'X' ){
1551 # X<> - ignore
1552 $$rstr =~ s/^[^>]*>//;
1553
1554 } elsif( $func eq 'Z' ){
1555 # Z<> - empty
1556 warn "$0: $podfile: invalid X<> in paragraph $paragraph."
1557 unless $$rstr =~ s/^>//;
1558
1559 } else {
1560 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1561 # all others: either recurse into new function or
1562 # terminate at closing angle bracket
1563 my $pt = $1;
1564 $pt .= '>' if $2 eq '>' && $lev == 1;
1565 $res .= $lev == 1 ? pure_text( $pt ) : inIS_text( $pt );
1566 return $res if $2 eq '>' && $lev > 1;
1567 if( $2 ne '>' ){
1568 $res .= process_text1( $lev, $rstr, substr($2,0,1) );
1569 }
1570
1571 }
1572 if( $lev == 1 ){
1573 $res .= pure_text( $$rstr );
54310121 1574 } else {
2a28b791 1575 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
54310121 1576 }
1577 }
2a28b791 1578 return $res;
1579}
54310121 1580
2a28b791 1581#
1582# go_ahead: extract text of an IS (can be nested)
1583#
1584sub go_ahead($$){
1585 my( $rstr, $func ) = @_;
1586 my $res = '';
1587 my $level = 1;
1588 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)//s ){
1589 $res .= $1;
1590 if( $2 eq '>' ){
1591 return $res if --$level == 0;
1592 } else {
1593 ++$level;
1594 }
1595 $res .= $2;
1596 }
1597 warn "$0: $podfile: undelimited $func<> in paragraph $paragraph.";
1598 return $res;
54310121 1599}
1600
1601#
2a28b791 1602# emit_C - output result of C<text>
1603# $text is the depod-ed text
54310121 1604#
2a28b791 1605sub emit_C($;$){
1606 my( $text, $nocode ) = @_;
1607 my $res;
1608 my( $url, $fid ) = coderef( undef(), $text );
1609
1610 # need HTML-safe text
1611 my $linktext = html_escape( $text );
1612
1613 if( defined( $url ) &&
1614 (!defined( $EmittedItem ) || $EmittedItem ne $fid ) ){
1615 $res = "<A HREF=\"$url\"><CODE>$linktext</CODE></A>";
1616 } elsif( 0 && $nocode ){
1617 $res = $linktext;
1618 } else {
1619 $res = "<CODE>$linktext</CODE>";
1620 }
1621 return $res;
54310121 1622}
1623
1624#
2a28b791 1625# html_escape: make text safe for HTML
1626#
1627sub html_escape {
1628 my $rest = $_[0];
1629 $rest =~ s/&/&amp;/g;
1630 $rest =~ s/</&lt;/g;
1631 $rest =~ s/>/&gt;/g;
1632 $rest =~ s/"/&quot;/g;
1633 return $rest;
1634}
1635
1636
1637#
39e571d4 1638# dosify - convert filenames to 8.3
1639#
1640sub dosify {
1641 my($str) = @_;
fe4c6be1 1642 return lc($str) if $^O eq 'VMS'; # VMS just needs casing
39e571d4 1643 if ($Is83) {
1644 $str = lc $str;
1645 $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
1646 $str =~ s/(\w+)/substr ($1,0,8)/ge;
1647 }
1648 return $str;
1649}
1650
1651#
2a28b791 1652# page_sect - make an URL from the text of a L<>
54310121 1653#
2a28b791 1654sub page_sect($$) {
1655 my( $page, $section ) = @_;
1656 my( $linktext, $page83, $link); # work strings
1657
1658 # check if we know that this is a section in this page
1659 if (!defined $pages{$page} && defined $sections{$page}) {
1660 $section = $page;
1661 $page = "";
1662 ### print STDERR "reset page='', section=$section\n";
54310121 1663 }
1664
39e571d4 1665 $page83=dosify($page);
1666 $page=$page83 if (defined $pages{$page83});
54310121 1667 if ($page eq "") {
2a28b791 1668 $link = "#" . htmlify( $section );
350ccacd 1669 } elsif ( $page =~ /::/ ) {
350ccacd 1670 $page =~ s,::,/,g;
29f227c9 1671 # Search page cache for an entry keyed under the html page name,
1672 # then look to see what directory that page might be in. NOTE:
1673 # this will only find one page. A better solution might be to produce
1674 # an intermediate page that is an index to all such pages.
1675 my $page_name = $page ;
1676 $page_name =~ s,^.*/,, ;
1677 if ( defined( $pages{ $page_name } ) &&
1678 $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
1679 ) {
1680 $page = $1 ;
1681 }
1682 else {
1683 # NOTE: This branch assumes that all A::B pages are located in
1684 # $htmlroot/A/B.html . This is often incorrect, since they are
1685 # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
1686 # analyze the contents of %pages and figure out where any
1687 # cousins of A::B are, then assume that. So, if A::B isn't found,
1688 # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
1689 # lib/A/B.pm. This is also limited, but it's an improvement.
1690 # Maybe a hints file so that the links point to the correct places
2a28b791 1691 # nonetheless?
1692
29f227c9 1693 }
350ccacd 1694 $link = "$htmlroot/$page.html";
2a28b791 1695 $link .= "#" . htmlify( $section ) if ($section);
54310121 1696 } elsif (!defined $pages{$page}) {
54310121 1697 $link = "";
54310121 1698 } else {
2a28b791 1699 $section = htmlify( $section ) if $section ne "";
1700 ### print STDERR "...section=$section\n";
54310121 1701
1702 # if there is a directory by the name of the page, then assume that an
1703 # appropriate section will exist in the subdirectory
29f227c9 1704# if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
1705 if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
54310121 1706 $link = "$htmlroot/$1/$section.html";
2a28b791 1707 ### print STDERR "...link=$link\n";
54310121 1708
1709 # since there is no directory by the name of the page, the section will
1710 # have to exist within a .html of the same name. thus, make sure there
1711 # is a .pod or .pm that might become that .html
1712 } else {
2a28b791 1713 $section = "#$section" if $section;
1714 ### print STDERR "...section=$section\n";
1715
54310121 1716 # check if there is a .pod with the page name
1717 if ($pages{$page} =~ /([^:]*)\.pod:/) {
1718 $link = "$htmlroot/$1.html$section";
1719 } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
1720 $link = "$htmlroot/$1.html$section";
1721 } else {
54310121 1722 $link = "";
54310121 1723 }
1724 }
1725 }
1726
54310121 1727 if ($link) {
29f227c9 1728 # Here, we take advantage of the knowledge that $htmlfileurl ne ''
1729 # implies $htmlroot eq ''. This means that the link in question
1730 # needs a prefix of $htmldir if it begins with '/'. The test for
1731 # the initial '/' is done to avoid '#'-only links, and to allow
1732 # for other kinds of links, like file:, ftp:, etc.
1733 my $url ;
1734 if ( $htmlfileurl ne '' ) {
2a28b791 1735 $link = "$htmldir$link" if $link =~ m{^/};
1736 $url = relativize_url( $link, $htmlfileurl );
1737# print( " b: [$link,$htmlfileurl,$url]\n" );
29f227c9 1738 }
1739 else {
1740 $url = $link ;
1741 }
2a28b791 1742 return $url;
29f227c9 1743
54310121 1744 } else {
2a28b791 1745 return undef();
54310121 1746 }
54310121 1747}
1748
1749#
29f227c9 1750# relativize_url - convert an absolute URL to one relative to a base URL.
1751# Assumes both end in a filename.
1752#
1753sub relativize_url {
1754 my ($dest,$source) = @_ ;
1755
1756 my ($dest_volume,$dest_directory,$dest_file) =
1757 File::Spec::Unix->splitpath( $dest ) ;
1758 $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
1759
1760 my ($source_volume,$source_directory,$source_file) =
1761 File::Spec::Unix->splitpath( $source ) ;
1762 $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
1763
1764 my $rel_path = '' ;
1765 if ( $dest ne '' ) {
1766 $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
1767 }
1768
1769 if ( $rel_path ne '' &&
1770 substr( $rel_path, -1 ) ne '/' &&
1771 substr( $dest_file, 0, 1 ) ne '#'
1772 ) {
1773 $rel_path .= "/$dest_file" ;
1774 }
1775 else {
1776 $rel_path .= "$dest_file" ;
1777 }
1778
1779 return $rel_path ;
1780}
1781
54310121 1782
1783#
2a28b791 1784# coderef - make URL from the text of a C<>
54310121 1785#
2a28b791 1786sub coderef($$){
1787 my( $page, $item ) = @_;
1788 my( $url );
1789
1790 my $fid = fragment_id( $item );
2a28b791 1791 if( defined( $page ) ){
1792 # we have been given a $page...
1793 $page =~ s{::}{/}g;
1794
1795 # Do we take it? Item could be a section!
228a48a5 1796 my $base = $items{$fid} || "";
2a28b791 1797 $base =~ s{[^/]*/}{};
1798 if( $base ne "$page.html" ){
1799 ### print STDERR "coderef( $page, $item ): items{$fid} = $items{$fid} = $base => discard page!\n";
1800 $page = undef();
1801 }
54310121 1802
2a28b791 1803 } else {
1804 # no page - local items precede cached items
67398a75 1805 if( defined( $fid ) ){
1806 if( exists $local_items{$fid} ){
1807 $page = $local_items{$fid};
1808 } else {
1809 $page = $items{$fid};
1810 }
2a28b791 1811 }
1812 }
54310121 1813
1814 # if there was a pod file that we found earlier with an appropriate
1815 # =item directive, then create a link to that page.
2a28b791 1816 if( defined $page ){
1817 if( $page ){
228a48a5 1818 if( exists $pages{$page} and $pages{$page} =~ /([^:.]*)\.[^:]*:/){
2a28b791 1819 $page = $1 . '.html';
29f227c9 1820 }
2a28b791 1821 my $link = "$htmlroot/$page#item_$fid";
54310121 1822
2a28b791 1823 # Here, we take advantage of the knowledge that $htmlfileurl
1824 # ne '' implies $htmlroot eq ''.
1825 if ( $htmlfileurl ne '' ) {
1826 $link = "$htmldir$link" ;
1827 $url = relativize_url( $link, $htmlfileurl ) ;
1828 } else {
1829 $url = $link ;
1830 }
1831 } else {
1832 $url = "#item_" . $fid;
1833 }
54310121 1834
2a28b791 1835 confess "url has space: $url" if $url =~ /"[^"]*\s[^"]*"/;
1836 }
1837 return( $url, $fid );
54310121 1838}
1839
54310121 1840
1841
1842#
29f227c9 1843# Adapted from Nick Ing-Simmons' PodToHtml package.
1844sub relative_url {
1845 my $source_file = shift ;
1846 my $destination_file = shift;
1847
1848 my $source = URI::file->new_abs($source_file);
1849 my $uo = URI::file->new($destination_file,$source)->abs;
1850 return $uo->rel->as_string;
1851}
1852
1853
1854#
54310121 1855# finish_list - finish off any pending HTML lists. this should be called
1856# after the entire pod file has been read and converted.
1857#
1858sub finish_list {
7b8d334a 1859 while ($listlevel > 0) {
54310121 1860 print HTML "</DL>\n";
1861 $listlevel--;
1862 }
1863}
1864
1865#
1866# htmlify - converts a pod section specification to a suitable section
2a28b791 1867# specification for HTML. Note that we keep spaces and special characters
1868# except ", ? (Netscape problem) and the hyphen (writer's problem...).
54310121 1869#
1870sub htmlify {
2a28b791 1871 my( $heading) = @_;
1872 $heading =~ s/(\s+)/ /g;
1873 $heading =~ s/\s+\Z//;
1874 $heading =~ s/\A\s+//;
1875 # The hyphen is a disgrace to the English language.
1876 $heading =~ s/[-"?]//g;
1877 $heading = lc( $heading );
1878 return $heading;
1879}
54310121 1880
2a28b791 1881#
1882# depod - convert text by eliminating all interior sequences
1883# Note: can be called with copy or modify semantics
1884#
1885my %E2c;
67398a75 1886$E2c{lt} = '<';
1887$E2c{gt} = '>';
1888$E2c{sol} = '/';
2a28b791 1889$E2c{verbar} = '|';
67398a75 1890$E2c{amp} = '&'; # in Tk's pods
2a28b791 1891
7ba65c74 1892sub depod1($;$);
1893
2a28b791 1894sub depod($){
1895 my $string;
1896 if( ref( $_[0] ) ){
1897 $string = ${$_[0]};
1898 ${$_[0]} = depod1( \$string );
1899 } else {
1900 $string = $_[0];
1901 depod1( \$string );
1902 }
1903}
54310121 1904
2a28b791 1905sub depod1($;$){
1906 my( $rstr, $func ) = @_;
1907 my $res = '';
228a48a5 1908 return $res unless defined $$rstr;
2a28b791 1909 if( ! defined( $func ) ){
1910 # skip to next begin of an interior sequence
1911 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ])<// ){
1912 # recurse into its text
1913 $res .= $1 . depod1( $rstr, $2 );
1914 }
1915 $res .= $$rstr;
1916 } elsif( $func eq 'E' ){
1917 # E<x> - convert to character
1918 $$rstr =~ s/^(\w+)>//;
228a48a5 1919 $res .= $E2c{$1} || "";
2a28b791 1920 } elsif( $func eq 'X' ){
1921 # X<> - ignore
1922 $$rstr =~ s/^[^>]*>//;
1923 } elsif( $func eq 'Z' ){
1924 # Z<> - empty
1925 $$rstr =~ s/^>//;
1926 } else {
1927 # all others: either recurse into new function or
1928 # terminate at closing angle bracket
1929 while( $$rstr =~ s/\A(.*?)([BCEFILSXZ]<|>)// ){
1930 $res .= $1;
1931 last if $2 eq '>';
1932 $res .= depod1( $rstr, substr($2,0,1) );
1933 }
1934 ## If we're here and $2 ne '>': undelimited interior sequence.
1935 ## Ignored, as this is called without proper indication of where we are.
1936 ## Rely on process_text to produce diagnostics.
1937 }
1938 return $res;
1939}
54310121 1940
2a28b791 1941#
1942# fragment_id - construct a fragment identifier from:
1943# a) =item text
1944# b) contents of C<...>
1945#
1946my @hc;
1947sub fragment_id {
1948 my $text = shift();
1949 $text =~ s/\s+\Z//s;
1950 if( $text ){
1951 # a method or function?
1952 return $1 if $text =~ /(\w+)\s*\(/;
1953 return $1 if $text =~ /->\s*(\w+)\s*\(?/;
1954
1955 # a variable name?
1956 return $1 if $text =~ /^([$@%*]\S+)/;
1957
1958 # some pattern matching operator?
1959 return $1 if $text =~ m|^(\w+/).*/\w*$|;
1960
1961 # fancy stuff... like "do { }"
1962 return $1 if $text =~ m|^(\w+)\s*{.*}$|;
1963
1964 # honour the perlfunc manpage: func [PAR[,[ ]PAR]...]
1965 # and some funnies with ... Module ...
1966 return $1 if $text =~ m{^([a-z\d]+)(\s+[A-Z\d,/& ]+)?$};
1967 return $1 if $text =~ m{^([a-z\d]+)\s+Module(\s+[A-Z\d,/& ]+)?$};
1968
1969 # text? normalize!
1970 $text =~ s/\s+/_/sg;
1971 $text =~ s{(\W)}{
1972 defined( $hc[ord($1)] ) ? $hc[ord($1)]
1973 : ( $hc[ord($1)] = sprintf( "%%%02X", ord($1) ) ) }gxe;
1974 $text = substr( $text, 0, 50 );
1975 } else {
1976 return undef();
1977 }
54310121 1978}
1979
2a28b791 1980#
1981# make_URL_href - generate HTML href from URL
1982# Special treatment for CGI queries.
1983#
1984sub make_URL_href($){
1985 my( $url ) = @_;
1986 if( $url !~
228a48a5 1987 s{^(http:[-\w/#~:.+=&%@!]+)(\?.*)$}{<A HREF="$1$2">$1</A>}i ){
2a28b791 1988 $url = "<A HREF=\"$url\">$url</A>";
1989 }
1990 return $url;
54310121 1991}
1992
19931;